1 /* 2 * Host code generation 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 22 #define NO_CPU_IO_DEFS 23 #include "trace.h" 24 #include "disas/disas.h" 25 #include "exec/exec-all.h" 26 #include "tcg/tcg.h" 27 #if defined(CONFIG_USER_ONLY) 28 #include "qemu.h" 29 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 30 #include <sys/param.h> 31 #if __FreeBSD_version >= 700104 32 #define HAVE_KINFO_GETVMMAP 33 #define sigqueue sigqueue_freebsd /* avoid redefinition */ 34 #include <sys/proc.h> 35 #include <machine/profile.h> 36 #define _KERNEL 37 #include <sys/user.h> 38 #undef _KERNEL 39 #undef sigqueue 40 #include <libutil.h> 41 #endif 42 #endif 43 #else 44 #include "exec/ram_addr.h" 45 #endif 46 47 #include "exec/cputlb.h" 48 #include "exec/translate-all.h" 49 #include "exec/translator.h" 50 #include "qemu/bitmap.h" 51 #include "qemu/qemu-print.h" 52 #include "qemu/timer.h" 53 #include "qemu/main-loop.h" 54 #include "qemu/cacheinfo.h" 55 #include "exec/log.h" 56 #include "sysemu/cpus.h" 57 #include "sysemu/cpu-timers.h" 58 #include "sysemu/tcg.h" 59 #include "qapi/error.h" 60 #include "hw/core/tcg-cpu-ops.h" 61 #include "tb-jmp-cache.h" 62 #include "tb-hash.h" 63 #include "tb-context.h" 64 #include "internal.h" 65 66 /* make various TB consistency checks */ 67 68 /** 69 * struct page_entry - page descriptor entry 70 * @pd: pointer to the &struct PageDesc of the page this entry represents 71 * @index: page index of the page 72 * @locked: whether the page is locked 73 * 74 * This struct helps us keep track of the locked state of a page, without 75 * bloating &struct PageDesc. 76 * 77 * A page lock protects accesses to all fields of &struct PageDesc. 78 * 79 * See also: &struct page_collection. 80 */ 81 struct page_entry { 82 PageDesc *pd; 83 tb_page_addr_t index; 84 bool locked; 85 }; 86 87 /** 88 * struct page_collection - tracks a set of pages (i.e. &struct page_entry's) 89 * @tree: Binary search tree (BST) of the pages, with key == page index 90 * @max: Pointer to the page in @tree with the highest page index 91 * 92 * To avoid deadlock we lock pages in ascending order of page index. 93 * When operating on a set of pages, we need to keep track of them so that 94 * we can lock them in order and also unlock them later. For this we collect 95 * pages (i.e. &struct page_entry's) in a binary search @tree. Given that the 96 * @tree implementation we use does not provide an O(1) operation to obtain the 97 * highest-ranked element, we use @max to keep track of the inserted page 98 * with the highest index. This is valuable because if a page is not in 99 * the tree and its index is higher than @max's, then we can lock it 100 * without breaking the locking order rule. 101 * 102 * Note on naming: 'struct page_set' would be shorter, but we already have a few 103 * page_set_*() helpers, so page_collection is used instead to avoid confusion. 104 * 105 * See also: page_collection_lock(). 106 */ 107 struct page_collection { 108 GTree *tree; 109 struct page_entry *max; 110 }; 111 112 /* 113 * In system mode we want L1_MAP to be based on ram offsets, 114 * while in user mode we want it to be based on virtual addresses. 115 * 116 * TODO: For user mode, see the caveat re host vs guest virtual 117 * address spaces near GUEST_ADDR_MAX. 118 */ 119 #if !defined(CONFIG_USER_ONLY) 120 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS 121 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS 122 #else 123 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS 124 #endif 125 #else 126 # define L1_MAP_ADDR_SPACE_BITS MIN(HOST_LONG_BITS, TARGET_ABI_BITS) 127 #endif 128 129 /* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */ 130 QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS > 131 sizeof_field(TranslationBlock, trace_vcpu_dstate) 132 * BITS_PER_BYTE); 133 134 /* 135 * L1 Mapping properties 136 */ 137 int v_l1_size; 138 int v_l1_shift; 139 int v_l2_levels; 140 141 void *l1_map[V_L1_MAX_SIZE]; 142 143 TBContext tb_ctx; 144 145 static void page_table_config_init(void) 146 { 147 uint32_t v_l1_bits; 148 149 assert(TARGET_PAGE_BITS); 150 /* The bits remaining after N lower levels of page tables. */ 151 v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS; 152 if (v_l1_bits < V_L1_MIN_BITS) { 153 v_l1_bits += V_L2_BITS; 154 } 155 156 v_l1_size = 1 << v_l1_bits; 157 v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits; 158 v_l2_levels = v_l1_shift / V_L2_BITS - 1; 159 160 assert(v_l1_bits <= V_L1_MAX_BITS); 161 assert(v_l1_shift % V_L2_BITS == 0); 162 assert(v_l2_levels >= 0); 163 } 164 165 /* Encode VAL as a signed leb128 sequence at P. 166 Return P incremented past the encoded value. */ 167 static uint8_t *encode_sleb128(uint8_t *p, target_long val) 168 { 169 int more, byte; 170 171 do { 172 byte = val & 0x7f; 173 val >>= 7; 174 more = !((val == 0 && (byte & 0x40) == 0) 175 || (val == -1 && (byte & 0x40) != 0)); 176 if (more) { 177 byte |= 0x80; 178 } 179 *p++ = byte; 180 } while (more); 181 182 return p; 183 } 184 185 /* Decode a signed leb128 sequence at *PP; increment *PP past the 186 decoded value. Return the decoded value. */ 187 static target_long decode_sleb128(const uint8_t **pp) 188 { 189 const uint8_t *p = *pp; 190 target_long val = 0; 191 int byte, shift = 0; 192 193 do { 194 byte = *p++; 195 val |= (target_ulong)(byte & 0x7f) << shift; 196 shift += 7; 197 } while (byte & 0x80); 198 if (shift < TARGET_LONG_BITS && (byte & 0x40)) { 199 val |= -(target_ulong)1 << shift; 200 } 201 202 *pp = p; 203 return val; 204 } 205 206 /* Encode the data collected about the instructions while compiling TB. 207 Place the data at BLOCK, and return the number of bytes consumed. 208 209 The logical table consists of TARGET_INSN_START_WORDS target_ulong's, 210 which come from the target's insn_start data, followed by a uintptr_t 211 which comes from the host pc of the end of the code implementing the insn. 212 213 Each line of the table is encoded as sleb128 deltas from the previous 214 line. The seed for the first line is { tb->pc, 0..., tb->tc.ptr }. 215 That is, the first column is seeded with the guest pc, the last column 216 with the host pc, and the middle columns with zeros. */ 217 218 static int encode_search(TranslationBlock *tb, uint8_t *block) 219 { 220 uint8_t *highwater = tcg_ctx->code_gen_highwater; 221 uint8_t *p = block; 222 int i, j, n; 223 224 for (i = 0, n = tb->icount; i < n; ++i) { 225 target_ulong prev; 226 227 for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { 228 if (i == 0) { 229 prev = (!TARGET_TB_PCREL && j == 0 ? tb_pc(tb) : 0); 230 } else { 231 prev = tcg_ctx->gen_insn_data[i - 1][j]; 232 } 233 p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev); 234 } 235 prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]); 236 p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev); 237 238 /* Test for (pending) buffer overflow. The assumption is that any 239 one row beginning below the high water mark cannot overrun 240 the buffer completely. Thus we can test for overflow after 241 encoding a row without having to check during encoding. */ 242 if (unlikely(p > highwater)) { 243 return -1; 244 } 245 } 246 247 return p - block; 248 } 249 250 /* The cpu state corresponding to 'searched_pc' is restored. 251 * When reset_icount is true, current TB will be interrupted and 252 * icount should be recalculated. 253 */ 254 int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, 255 uintptr_t searched_pc, bool reset_icount) 256 { 257 uint64_t data[TARGET_INSN_START_WORDS]; 258 uintptr_t host_pc = (uintptr_t)tb->tc.ptr; 259 const uint8_t *p = tb->tc.ptr + tb->tc.size; 260 int i, j, num_insns = tb->icount; 261 #ifdef CONFIG_PROFILER 262 TCGProfile *prof = &tcg_ctx->prof; 263 int64_t ti = profile_getclock(); 264 #endif 265 266 searched_pc -= GETPC_ADJ; 267 268 if (searched_pc < host_pc) { 269 return -1; 270 } 271 272 memset(data, 0, sizeof(data)); 273 if (!TARGET_TB_PCREL) { 274 data[0] = tb_pc(tb); 275 } 276 277 /* Reconstruct the stored insn data while looking for the point at 278 which the end of the insn exceeds the searched_pc. */ 279 for (i = 0; i < num_insns; ++i) { 280 for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { 281 data[j] += decode_sleb128(&p); 282 } 283 host_pc += decode_sleb128(&p); 284 if (host_pc > searched_pc) { 285 goto found; 286 } 287 } 288 return -1; 289 290 found: 291 if (reset_icount && (tb_cflags(tb) & CF_USE_ICOUNT)) { 292 assert(icount_enabled()); 293 /* Reset the cycle counter to the start of the block 294 and shift if to the number of actually executed instructions */ 295 cpu_neg(cpu)->icount_decr.u16.low += num_insns - i; 296 } 297 298 cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data); 299 300 #ifdef CONFIG_PROFILER 301 qatomic_set(&prof->restore_time, 302 prof->restore_time + profile_getclock() - ti); 303 qatomic_set(&prof->restore_count, prof->restore_count + 1); 304 #endif 305 return 0; 306 } 307 308 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit) 309 { 310 /* 311 * The pc update associated with restore without exit will 312 * break the relative pc adjustments performed by TARGET_TB_PCREL. 313 */ 314 if (TARGET_TB_PCREL) { 315 assert(will_exit); 316 } 317 318 /* 319 * The host_pc has to be in the rx region of the code buffer. 320 * If it is not we will not be able to resolve it here. 321 * The two cases where host_pc will not be correct are: 322 * 323 * - fault during translation (instruction fetch) 324 * - fault from helper (not using GETPC() macro) 325 * 326 * Either way we need return early as we can't resolve it here. 327 */ 328 if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) { 329 TranslationBlock *tb = tcg_tb_lookup(host_pc); 330 if (tb) { 331 cpu_restore_state_from_tb(cpu, tb, host_pc, will_exit); 332 return true; 333 } 334 } 335 return false; 336 } 337 338 void page_init(void) 339 { 340 page_size_init(); 341 page_table_config_init(); 342 343 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY) 344 { 345 #ifdef HAVE_KINFO_GETVMMAP 346 struct kinfo_vmentry *freep; 347 int i, cnt; 348 349 freep = kinfo_getvmmap(getpid(), &cnt); 350 if (freep) { 351 mmap_lock(); 352 for (i = 0; i < cnt; i++) { 353 unsigned long startaddr, endaddr; 354 355 startaddr = freep[i].kve_start; 356 endaddr = freep[i].kve_end; 357 if (h2g_valid(startaddr)) { 358 startaddr = h2g(startaddr) & TARGET_PAGE_MASK; 359 360 if (h2g_valid(endaddr)) { 361 endaddr = h2g(endaddr); 362 page_set_flags(startaddr, endaddr, PAGE_RESERVED); 363 } else { 364 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS 365 endaddr = ~0ul; 366 page_set_flags(startaddr, endaddr, PAGE_RESERVED); 367 #endif 368 } 369 } 370 } 371 free(freep); 372 mmap_unlock(); 373 } 374 #else 375 FILE *f; 376 377 last_brk = (unsigned long)sbrk(0); 378 379 f = fopen("/compat/linux/proc/self/maps", "r"); 380 if (f) { 381 mmap_lock(); 382 383 do { 384 unsigned long startaddr, endaddr; 385 int n; 386 387 n = fscanf(f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr); 388 389 if (n == 2 && h2g_valid(startaddr)) { 390 startaddr = h2g(startaddr) & TARGET_PAGE_MASK; 391 392 if (h2g_valid(endaddr)) { 393 endaddr = h2g(endaddr); 394 } else { 395 endaddr = ~0ul; 396 } 397 page_set_flags(startaddr, endaddr, PAGE_RESERVED); 398 } 399 } while (!feof(f)); 400 401 fclose(f); 402 mmap_unlock(); 403 } 404 #endif 405 } 406 #endif 407 } 408 409 PageDesc *page_find_alloc(tb_page_addr_t index, bool alloc) 410 { 411 PageDesc *pd; 412 void **lp; 413 int i; 414 415 /* Level 1. Always allocated. */ 416 lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1)); 417 418 /* Level 2..N-1. */ 419 for (i = v_l2_levels; i > 0; i--) { 420 void **p = qatomic_rcu_read(lp); 421 422 if (p == NULL) { 423 void *existing; 424 425 if (!alloc) { 426 return NULL; 427 } 428 p = g_new0(void *, V_L2_SIZE); 429 existing = qatomic_cmpxchg(lp, NULL, p); 430 if (unlikely(existing)) { 431 g_free(p); 432 p = existing; 433 } 434 } 435 436 lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1)); 437 } 438 439 pd = qatomic_rcu_read(lp); 440 if (pd == NULL) { 441 void *existing; 442 443 if (!alloc) { 444 return NULL; 445 } 446 pd = g_new0(PageDesc, V_L2_SIZE); 447 #ifndef CONFIG_USER_ONLY 448 { 449 int i; 450 451 for (i = 0; i < V_L2_SIZE; i++) { 452 qemu_spin_init(&pd[i].lock); 453 } 454 } 455 #endif 456 existing = qatomic_cmpxchg(lp, NULL, pd); 457 if (unlikely(existing)) { 458 #ifndef CONFIG_USER_ONLY 459 { 460 int i; 461 462 for (i = 0; i < V_L2_SIZE; i++) { 463 qemu_spin_destroy(&pd[i].lock); 464 } 465 } 466 #endif 467 g_free(pd); 468 pd = existing; 469 } 470 } 471 472 return pd + (index & (V_L2_SIZE - 1)); 473 } 474 475 /* In user-mode page locks aren't used; mmap_lock is enough */ 476 #ifdef CONFIG_USER_ONLY 477 struct page_collection * 478 page_collection_lock(tb_page_addr_t start, tb_page_addr_t end) 479 { 480 return NULL; 481 } 482 483 void page_collection_unlock(struct page_collection *set) 484 { } 485 #else /* !CONFIG_USER_ONLY */ 486 487 #ifdef CONFIG_DEBUG_TCG 488 489 static __thread GHashTable *ht_pages_locked_debug; 490 491 static void ht_pages_locked_debug_init(void) 492 { 493 if (ht_pages_locked_debug) { 494 return; 495 } 496 ht_pages_locked_debug = g_hash_table_new(NULL, NULL); 497 } 498 499 static bool page_is_locked(const PageDesc *pd) 500 { 501 PageDesc *found; 502 503 ht_pages_locked_debug_init(); 504 found = g_hash_table_lookup(ht_pages_locked_debug, pd); 505 return !!found; 506 } 507 508 static void page_lock__debug(PageDesc *pd) 509 { 510 ht_pages_locked_debug_init(); 511 g_assert(!page_is_locked(pd)); 512 g_hash_table_insert(ht_pages_locked_debug, pd, pd); 513 } 514 515 static void page_unlock__debug(const PageDesc *pd) 516 { 517 bool removed; 518 519 ht_pages_locked_debug_init(); 520 g_assert(page_is_locked(pd)); 521 removed = g_hash_table_remove(ht_pages_locked_debug, pd); 522 g_assert(removed); 523 } 524 525 void do_assert_page_locked(const PageDesc *pd, const char *file, int line) 526 { 527 if (unlikely(!page_is_locked(pd))) { 528 error_report("assert_page_lock: PageDesc %p not locked @ %s:%d", 529 pd, file, line); 530 abort(); 531 } 532 } 533 534 void assert_no_pages_locked(void) 535 { 536 ht_pages_locked_debug_init(); 537 g_assert(g_hash_table_size(ht_pages_locked_debug) == 0); 538 } 539 540 #else /* !CONFIG_DEBUG_TCG */ 541 542 static inline void page_lock__debug(const PageDesc *pd) { } 543 static inline void page_unlock__debug(const PageDesc *pd) { } 544 545 #endif /* CONFIG_DEBUG_TCG */ 546 547 void page_lock(PageDesc *pd) 548 { 549 page_lock__debug(pd); 550 qemu_spin_lock(&pd->lock); 551 } 552 553 void page_unlock(PageDesc *pd) 554 { 555 qemu_spin_unlock(&pd->lock); 556 page_unlock__debug(pd); 557 } 558 559 static inline struct page_entry * 560 page_entry_new(PageDesc *pd, tb_page_addr_t index) 561 { 562 struct page_entry *pe = g_malloc(sizeof(*pe)); 563 564 pe->index = index; 565 pe->pd = pd; 566 pe->locked = false; 567 return pe; 568 } 569 570 static void page_entry_destroy(gpointer p) 571 { 572 struct page_entry *pe = p; 573 574 g_assert(pe->locked); 575 page_unlock(pe->pd); 576 g_free(pe); 577 } 578 579 /* returns false on success */ 580 static bool page_entry_trylock(struct page_entry *pe) 581 { 582 bool busy; 583 584 busy = qemu_spin_trylock(&pe->pd->lock); 585 if (!busy) { 586 g_assert(!pe->locked); 587 pe->locked = true; 588 page_lock__debug(pe->pd); 589 } 590 return busy; 591 } 592 593 static void do_page_entry_lock(struct page_entry *pe) 594 { 595 page_lock(pe->pd); 596 g_assert(!pe->locked); 597 pe->locked = true; 598 } 599 600 static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data) 601 { 602 struct page_entry *pe = value; 603 604 do_page_entry_lock(pe); 605 return FALSE; 606 } 607 608 static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data) 609 { 610 struct page_entry *pe = value; 611 612 if (pe->locked) { 613 pe->locked = false; 614 page_unlock(pe->pd); 615 } 616 return FALSE; 617 } 618 619 /* 620 * Trylock a page, and if successful, add the page to a collection. 621 * Returns true ("busy") if the page could not be locked; false otherwise. 622 */ 623 static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr) 624 { 625 tb_page_addr_t index = addr >> TARGET_PAGE_BITS; 626 struct page_entry *pe; 627 PageDesc *pd; 628 629 pe = g_tree_lookup(set->tree, &index); 630 if (pe) { 631 return false; 632 } 633 634 pd = page_find(index); 635 if (pd == NULL) { 636 return false; 637 } 638 639 pe = page_entry_new(pd, index); 640 g_tree_insert(set->tree, &pe->index, pe); 641 642 /* 643 * If this is either (1) the first insertion or (2) a page whose index 644 * is higher than any other so far, just lock the page and move on. 645 */ 646 if (set->max == NULL || pe->index > set->max->index) { 647 set->max = pe; 648 do_page_entry_lock(pe); 649 return false; 650 } 651 /* 652 * Try to acquire out-of-order lock; if busy, return busy so that we acquire 653 * locks in order. 654 */ 655 return page_entry_trylock(pe); 656 } 657 658 static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata) 659 { 660 tb_page_addr_t a = *(const tb_page_addr_t *)ap; 661 tb_page_addr_t b = *(const tb_page_addr_t *)bp; 662 663 if (a == b) { 664 return 0; 665 } else if (a < b) { 666 return -1; 667 } 668 return 1; 669 } 670 671 /* 672 * Lock a range of pages ([@start,@end[) as well as the pages of all 673 * intersecting TBs. 674 * Locking order: acquire locks in ascending order of page index. 675 */ 676 struct page_collection * 677 page_collection_lock(tb_page_addr_t start, tb_page_addr_t end) 678 { 679 struct page_collection *set = g_malloc(sizeof(*set)); 680 tb_page_addr_t index; 681 PageDesc *pd; 682 683 start >>= TARGET_PAGE_BITS; 684 end >>= TARGET_PAGE_BITS; 685 g_assert(start <= end); 686 687 set->tree = g_tree_new_full(tb_page_addr_cmp, NULL, NULL, 688 page_entry_destroy); 689 set->max = NULL; 690 assert_no_pages_locked(); 691 692 retry: 693 g_tree_foreach(set->tree, page_entry_lock, NULL); 694 695 for (index = start; index <= end; index++) { 696 TranslationBlock *tb; 697 int n; 698 699 pd = page_find(index); 700 if (pd == NULL) { 701 continue; 702 } 703 if (page_trylock_add(set, index << TARGET_PAGE_BITS)) { 704 g_tree_foreach(set->tree, page_entry_unlock, NULL); 705 goto retry; 706 } 707 assert_page_locked(pd); 708 PAGE_FOR_EACH_TB(pd, tb, n) { 709 if (page_trylock_add(set, tb_page_addr0(tb)) || 710 (tb_page_addr1(tb) != -1 && 711 page_trylock_add(set, tb_page_addr1(tb)))) { 712 /* drop all locks, and reacquire in order */ 713 g_tree_foreach(set->tree, page_entry_unlock, NULL); 714 goto retry; 715 } 716 } 717 } 718 return set; 719 } 720 721 void page_collection_unlock(struct page_collection *set) 722 { 723 /* entries are unlocked and freed via page_entry_destroy */ 724 g_tree_destroy(set->tree); 725 g_free(set); 726 } 727 728 #endif /* !CONFIG_USER_ONLY */ 729 730 /* Called with mmap_lock held for user mode emulation. */ 731 TranslationBlock *tb_gen_code(CPUState *cpu, 732 target_ulong pc, target_ulong cs_base, 733 uint32_t flags, int cflags) 734 { 735 CPUArchState *env = cpu->env_ptr; 736 TranslationBlock *tb, *existing_tb; 737 tb_page_addr_t phys_pc; 738 tcg_insn_unit *gen_code_buf; 739 int gen_code_size, search_size, max_insns; 740 #ifdef CONFIG_PROFILER 741 TCGProfile *prof = &tcg_ctx->prof; 742 int64_t ti; 743 #endif 744 void *host_pc; 745 746 assert_memory_lock(); 747 qemu_thread_jit_write(); 748 749 phys_pc = get_page_addr_code_hostp(env, pc, &host_pc); 750 751 if (phys_pc == -1) { 752 /* Generate a one-shot TB with 1 insn in it */ 753 cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1; 754 } 755 756 max_insns = cflags & CF_COUNT_MASK; 757 if (max_insns == 0) { 758 max_insns = TCG_MAX_INSNS; 759 } 760 QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS); 761 762 buffer_overflow: 763 tb = tcg_tb_alloc(tcg_ctx); 764 if (unlikely(!tb)) { 765 /* flush must be done */ 766 tb_flush(cpu); 767 mmap_unlock(); 768 /* Make the execution loop process the flush as soon as possible. */ 769 cpu->exception_index = EXCP_INTERRUPT; 770 cpu_loop_exit(cpu); 771 } 772 773 gen_code_buf = tcg_ctx->code_gen_ptr; 774 tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf); 775 #if !TARGET_TB_PCREL 776 tb->pc = pc; 777 #endif 778 tb->cs_base = cs_base; 779 tb->flags = flags; 780 tb->cflags = cflags; 781 tb->trace_vcpu_dstate = *cpu->trace_dstate; 782 tb_set_page_addr0(tb, phys_pc); 783 tb_set_page_addr1(tb, -1); 784 tcg_ctx->tb_cflags = cflags; 785 tb_overflow: 786 787 #ifdef CONFIG_PROFILER 788 /* includes aborted translations because of exceptions */ 789 qatomic_set(&prof->tb_count1, prof->tb_count1 + 1); 790 ti = profile_getclock(); 791 #endif 792 793 gen_code_size = sigsetjmp(tcg_ctx->jmp_trans, 0); 794 if (unlikely(gen_code_size != 0)) { 795 goto error_return; 796 } 797 798 tcg_func_start(tcg_ctx); 799 800 tcg_ctx->cpu = env_cpu(env); 801 gen_intermediate_code(cpu, tb, max_insns, pc, host_pc); 802 assert(tb->size != 0); 803 tcg_ctx->cpu = NULL; 804 max_insns = tb->icount; 805 806 trace_translate_block(tb, pc, tb->tc.ptr); 807 808 /* generate machine code */ 809 tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID; 810 tb->jmp_reset_offset[1] = TB_JMP_RESET_OFFSET_INVALID; 811 tcg_ctx->tb_jmp_reset_offset = tb->jmp_reset_offset; 812 if (TCG_TARGET_HAS_direct_jump) { 813 tcg_ctx->tb_jmp_insn_offset = tb->jmp_target_arg; 814 tcg_ctx->tb_jmp_target_addr = NULL; 815 } else { 816 tcg_ctx->tb_jmp_insn_offset = NULL; 817 tcg_ctx->tb_jmp_target_addr = tb->jmp_target_arg; 818 } 819 820 #ifdef CONFIG_PROFILER 821 qatomic_set(&prof->tb_count, prof->tb_count + 1); 822 qatomic_set(&prof->interm_time, 823 prof->interm_time + profile_getclock() - ti); 824 ti = profile_getclock(); 825 #endif 826 827 gen_code_size = tcg_gen_code(tcg_ctx, tb, pc); 828 if (unlikely(gen_code_size < 0)) { 829 error_return: 830 switch (gen_code_size) { 831 case -1: 832 /* 833 * Overflow of code_gen_buffer, or the current slice of it. 834 * 835 * TODO: We don't need to re-do gen_intermediate_code, nor 836 * should we re-do the tcg optimization currently hidden 837 * inside tcg_gen_code. All that should be required is to 838 * flush the TBs, allocate a new TB, re-initialize it per 839 * above, and re-do the actual code generation. 840 */ 841 qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT, 842 "Restarting code generation for " 843 "code_gen_buffer overflow\n"); 844 goto buffer_overflow; 845 846 case -2: 847 /* 848 * The code generated for the TranslationBlock is too large. 849 * The maximum size allowed by the unwind info is 64k. 850 * There may be stricter constraints from relocations 851 * in the tcg backend. 852 * 853 * Try again with half as many insns as we attempted this time. 854 * If a single insn overflows, there's a bug somewhere... 855 */ 856 assert(max_insns > 1); 857 max_insns /= 2; 858 qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT, 859 "Restarting code generation with " 860 "smaller translation block (max %d insns)\n", 861 max_insns); 862 goto tb_overflow; 863 864 default: 865 g_assert_not_reached(); 866 } 867 } 868 search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size); 869 if (unlikely(search_size < 0)) { 870 goto buffer_overflow; 871 } 872 tb->tc.size = gen_code_size; 873 874 #ifdef CONFIG_PROFILER 875 qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti); 876 qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size); 877 qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size); 878 qatomic_set(&prof->search_out_len, prof->search_out_len + search_size); 879 #endif 880 881 #ifdef DEBUG_DISAS 882 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) && 883 qemu_log_in_addr_range(pc)) { 884 FILE *logfile = qemu_log_trylock(); 885 if (logfile) { 886 int code_size, data_size; 887 const tcg_target_ulong *rx_data_gen_ptr; 888 size_t chunk_start; 889 int insn = 0; 890 891 if (tcg_ctx->data_gen_ptr) { 892 rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr); 893 code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr; 894 data_size = gen_code_size - code_size; 895 } else { 896 rx_data_gen_ptr = 0; 897 code_size = gen_code_size; 898 data_size = 0; 899 } 900 901 /* Dump header and the first instruction */ 902 fprintf(logfile, "OUT: [size=%d]\n", gen_code_size); 903 fprintf(logfile, 904 " -- guest addr 0x" TARGET_FMT_lx " + tb prologue\n", 905 tcg_ctx->gen_insn_data[insn][0]); 906 chunk_start = tcg_ctx->gen_insn_end_off[insn]; 907 disas(logfile, tb->tc.ptr, chunk_start); 908 909 /* 910 * Dump each instruction chunk, wrapping up empty chunks into 911 * the next instruction. The whole array is offset so the 912 * first entry is the beginning of the 2nd instruction. 913 */ 914 while (insn < tb->icount) { 915 size_t chunk_end = tcg_ctx->gen_insn_end_off[insn]; 916 if (chunk_end > chunk_start) { 917 fprintf(logfile, " -- guest addr 0x" TARGET_FMT_lx "\n", 918 tcg_ctx->gen_insn_data[insn][0]); 919 disas(logfile, tb->tc.ptr + chunk_start, 920 chunk_end - chunk_start); 921 chunk_start = chunk_end; 922 } 923 insn++; 924 } 925 926 if (chunk_start < code_size) { 927 fprintf(logfile, " -- tb slow paths + alignment\n"); 928 disas(logfile, tb->tc.ptr + chunk_start, 929 code_size - chunk_start); 930 } 931 932 /* Finally dump any data we may have after the block */ 933 if (data_size) { 934 int i; 935 fprintf(logfile, " data: [size=%d]\n", data_size); 936 for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) { 937 if (sizeof(tcg_target_ulong) == 8) { 938 fprintf(logfile, 939 "0x%08" PRIxPTR ": .quad 0x%016" TCG_PRIlx "\n", 940 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]); 941 } else if (sizeof(tcg_target_ulong) == 4) { 942 fprintf(logfile, 943 "0x%08" PRIxPTR ": .long 0x%08" TCG_PRIlx "\n", 944 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]); 945 } else { 946 qemu_build_not_reached(); 947 } 948 } 949 } 950 fprintf(logfile, "\n"); 951 qemu_log_unlock(logfile); 952 } 953 } 954 #endif 955 956 qatomic_set(&tcg_ctx->code_gen_ptr, (void *) 957 ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size, 958 CODE_GEN_ALIGN)); 959 960 /* init jump list */ 961 qemu_spin_init(&tb->jmp_lock); 962 tb->jmp_list_head = (uintptr_t)NULL; 963 tb->jmp_list_next[0] = (uintptr_t)NULL; 964 tb->jmp_list_next[1] = (uintptr_t)NULL; 965 tb->jmp_dest[0] = (uintptr_t)NULL; 966 tb->jmp_dest[1] = (uintptr_t)NULL; 967 968 /* init original jump addresses which have been set during tcg_gen_code() */ 969 if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) { 970 tb_reset_jump(tb, 0); 971 } 972 if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) { 973 tb_reset_jump(tb, 1); 974 } 975 976 /* 977 * If the TB is not associated with a physical RAM page then it must be 978 * a temporary one-insn TB, and we have nothing left to do. Return early 979 * before attempting to link to other TBs or add to the lookup table. 980 */ 981 if (tb_page_addr0(tb) == -1) { 982 return tb; 983 } 984 985 /* 986 * Insert TB into the corresponding region tree before publishing it 987 * through QHT. Otherwise rewinding happened in the TB might fail to 988 * lookup itself using host PC. 989 */ 990 tcg_tb_insert(tb); 991 992 /* 993 * No explicit memory barrier is required -- tb_link_page() makes the 994 * TB visible in a consistent state. 995 */ 996 existing_tb = tb_link_page(tb, tb_page_addr0(tb), tb_page_addr1(tb)); 997 /* if the TB already exists, discard what we just translated */ 998 if (unlikely(existing_tb != tb)) { 999 uintptr_t orig_aligned = (uintptr_t)gen_code_buf; 1000 1001 orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize); 1002 qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned); 1003 tcg_tb_remove(tb); 1004 return existing_tb; 1005 } 1006 return tb; 1007 } 1008 1009 /* user-mode: call with mmap_lock held */ 1010 void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr) 1011 { 1012 TranslationBlock *tb; 1013 1014 assert_memory_lock(); 1015 1016 tb = tcg_tb_lookup(retaddr); 1017 if (tb) { 1018 /* We can use retranslation to find the PC. */ 1019 cpu_restore_state_from_tb(cpu, tb, retaddr, true); 1020 tb_phys_invalidate(tb, -1); 1021 } else { 1022 /* The exception probably happened in a helper. The CPU state should 1023 have been saved before calling it. Fetch the PC from there. */ 1024 CPUArchState *env = cpu->env_ptr; 1025 target_ulong pc, cs_base; 1026 tb_page_addr_t addr; 1027 uint32_t flags; 1028 1029 cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); 1030 addr = get_page_addr_code(env, pc); 1031 if (addr != -1) { 1032 tb_invalidate_phys_range(addr, addr + 1); 1033 } 1034 } 1035 } 1036 1037 #ifndef CONFIG_USER_ONLY 1038 /* 1039 * In deterministic execution mode, instructions doing device I/Os 1040 * must be at the end of the TB. 1041 * 1042 * Called by softmmu_template.h, with iothread mutex not held. 1043 */ 1044 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) 1045 { 1046 TranslationBlock *tb; 1047 CPUClass *cc; 1048 uint32_t n; 1049 1050 tb = tcg_tb_lookup(retaddr); 1051 if (!tb) { 1052 cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p", 1053 (void *)retaddr); 1054 } 1055 cpu_restore_state_from_tb(cpu, tb, retaddr, true); 1056 1057 /* 1058 * Some guests must re-execute the branch when re-executing a delay 1059 * slot instruction. When this is the case, adjust icount and N 1060 * to account for the re-execution of the branch. 1061 */ 1062 n = 1; 1063 cc = CPU_GET_CLASS(cpu); 1064 if (cc->tcg_ops->io_recompile_replay_branch && 1065 cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) { 1066 cpu_neg(cpu)->icount_decr.u16.low++; 1067 n = 2; 1068 } 1069 1070 /* 1071 * Exit the loop and potentially generate a new TB executing the 1072 * just the I/O insns. We also limit instrumentation to memory 1073 * operations only (which execute after completion) so we don't 1074 * double instrument the instruction. 1075 */ 1076 cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n; 1077 1078 if (qemu_loglevel_mask(CPU_LOG_EXEC)) { 1079 target_ulong pc = log_pc(cpu, tb); 1080 if (qemu_log_in_addr_range(pc)) { 1081 qemu_log("cpu_io_recompile: rewound execution of TB to " 1082 TARGET_FMT_lx "\n", pc); 1083 } 1084 } 1085 1086 cpu_loop_exit_noexc(cpu); 1087 } 1088 1089 static void print_qht_statistics(struct qht_stats hst, GString *buf) 1090 { 1091 uint32_t hgram_opts; 1092 size_t hgram_bins; 1093 char *hgram; 1094 1095 if (!hst.head_buckets) { 1096 return; 1097 } 1098 g_string_append_printf(buf, "TB hash buckets %zu/%zu " 1099 "(%0.2f%% head buckets used)\n", 1100 hst.used_head_buckets, hst.head_buckets, 1101 (double)hst.used_head_buckets / 1102 hst.head_buckets * 100); 1103 1104 hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; 1105 hgram_opts |= QDIST_PR_100X | QDIST_PR_PERCENT; 1106 if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) { 1107 hgram_opts |= QDIST_PR_NODECIMAL; 1108 } 1109 hgram = qdist_pr(&hst.occupancy, 10, hgram_opts); 1110 g_string_append_printf(buf, "TB hash occupancy %0.2f%% avg chain occ. " 1111 "Histogram: %s\n", 1112 qdist_avg(&hst.occupancy) * 100, hgram); 1113 g_free(hgram); 1114 1115 hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; 1116 hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain); 1117 if (hgram_bins > 10) { 1118 hgram_bins = 10; 1119 } else { 1120 hgram_bins = 0; 1121 hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE; 1122 } 1123 hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts); 1124 g_string_append_printf(buf, "TB hash avg chain %0.3f buckets. " 1125 "Histogram: %s\n", 1126 qdist_avg(&hst.chain), hgram); 1127 g_free(hgram); 1128 } 1129 1130 struct tb_tree_stats { 1131 size_t nb_tbs; 1132 size_t host_size; 1133 size_t target_size; 1134 size_t max_target_size; 1135 size_t direct_jmp_count; 1136 size_t direct_jmp2_count; 1137 size_t cross_page; 1138 }; 1139 1140 static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data) 1141 { 1142 const TranslationBlock *tb = value; 1143 struct tb_tree_stats *tst = data; 1144 1145 tst->nb_tbs++; 1146 tst->host_size += tb->tc.size; 1147 tst->target_size += tb->size; 1148 if (tb->size > tst->max_target_size) { 1149 tst->max_target_size = tb->size; 1150 } 1151 if (tb_page_addr1(tb) != -1) { 1152 tst->cross_page++; 1153 } 1154 if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) { 1155 tst->direct_jmp_count++; 1156 if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) { 1157 tst->direct_jmp2_count++; 1158 } 1159 } 1160 return false; 1161 } 1162 1163 void dump_exec_info(GString *buf) 1164 { 1165 struct tb_tree_stats tst = {}; 1166 struct qht_stats hst; 1167 size_t nb_tbs, flush_full, flush_part, flush_elide; 1168 1169 tcg_tb_foreach(tb_tree_stats_iter, &tst); 1170 nb_tbs = tst.nb_tbs; 1171 /* XXX: avoid using doubles ? */ 1172 g_string_append_printf(buf, "Translation buffer state:\n"); 1173 /* 1174 * Report total code size including the padding and TB structs; 1175 * otherwise users might think "-accel tcg,tb-size" is not honoured. 1176 * For avg host size we use the precise numbers from tb_tree_stats though. 1177 */ 1178 g_string_append_printf(buf, "gen code size %zu/%zu\n", 1179 tcg_code_size(), tcg_code_capacity()); 1180 g_string_append_printf(buf, "TB count %zu\n", nb_tbs); 1181 g_string_append_printf(buf, "TB avg target size %zu max=%zu bytes\n", 1182 nb_tbs ? tst.target_size / nb_tbs : 0, 1183 tst.max_target_size); 1184 g_string_append_printf(buf, "TB avg host size %zu bytes " 1185 "(expansion ratio: %0.1f)\n", 1186 nb_tbs ? tst.host_size / nb_tbs : 0, 1187 tst.target_size ? 1188 (double)tst.host_size / tst.target_size : 0); 1189 g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n", 1190 tst.cross_page, 1191 nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0); 1192 g_string_append_printf(buf, "direct jump count %zu (%zu%%) " 1193 "(2 jumps=%zu %zu%%)\n", 1194 tst.direct_jmp_count, 1195 nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0, 1196 tst.direct_jmp2_count, 1197 nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0); 1198 1199 qht_statistics_init(&tb_ctx.htable, &hst); 1200 print_qht_statistics(hst, buf); 1201 qht_statistics_destroy(&hst); 1202 1203 g_string_append_printf(buf, "\nStatistics:\n"); 1204 g_string_append_printf(buf, "TB flush count %u\n", 1205 qatomic_read(&tb_ctx.tb_flush_count)); 1206 g_string_append_printf(buf, "TB invalidate count %u\n", 1207 qatomic_read(&tb_ctx.tb_phys_invalidate_count)); 1208 1209 tlb_flush_counts(&flush_full, &flush_part, &flush_elide); 1210 g_string_append_printf(buf, "TLB full flushes %zu\n", flush_full); 1211 g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part); 1212 g_string_append_printf(buf, "TLB elided flushes %zu\n", flush_elide); 1213 tcg_dump_info(buf); 1214 } 1215 1216 #else /* CONFIG_USER_ONLY */ 1217 1218 void cpu_interrupt(CPUState *cpu, int mask) 1219 { 1220 g_assert(qemu_mutex_iothread_locked()); 1221 cpu->interrupt_request |= mask; 1222 qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1); 1223 } 1224 1225 /* 1226 * Walks guest process memory "regions" one by one 1227 * and calls callback function 'fn' for each region. 1228 */ 1229 struct walk_memory_regions_data { 1230 walk_memory_regions_fn fn; 1231 void *priv; 1232 target_ulong start; 1233 int prot; 1234 }; 1235 1236 static int walk_memory_regions_end(struct walk_memory_regions_data *data, 1237 target_ulong end, int new_prot) 1238 { 1239 if (data->start != -1u) { 1240 int rc = data->fn(data->priv, data->start, end, data->prot); 1241 if (rc != 0) { 1242 return rc; 1243 } 1244 } 1245 1246 data->start = (new_prot ? end : -1u); 1247 data->prot = new_prot; 1248 1249 return 0; 1250 } 1251 1252 static int walk_memory_regions_1(struct walk_memory_regions_data *data, 1253 target_ulong base, int level, void **lp) 1254 { 1255 target_ulong pa; 1256 int i, rc; 1257 1258 if (*lp == NULL) { 1259 return walk_memory_regions_end(data, base, 0); 1260 } 1261 1262 if (level == 0) { 1263 PageDesc *pd = *lp; 1264 1265 for (i = 0; i < V_L2_SIZE; ++i) { 1266 int prot = pd[i].flags; 1267 1268 pa = base | (i << TARGET_PAGE_BITS); 1269 if (prot != data->prot) { 1270 rc = walk_memory_regions_end(data, pa, prot); 1271 if (rc != 0) { 1272 return rc; 1273 } 1274 } 1275 } 1276 } else { 1277 void **pp = *lp; 1278 1279 for (i = 0; i < V_L2_SIZE; ++i) { 1280 pa = base | ((target_ulong)i << 1281 (TARGET_PAGE_BITS + V_L2_BITS * level)); 1282 rc = walk_memory_regions_1(data, pa, level - 1, pp + i); 1283 if (rc != 0) { 1284 return rc; 1285 } 1286 } 1287 } 1288 1289 return 0; 1290 } 1291 1292 int walk_memory_regions(void *priv, walk_memory_regions_fn fn) 1293 { 1294 struct walk_memory_regions_data data; 1295 uintptr_t i, l1_sz = v_l1_size; 1296 1297 data.fn = fn; 1298 data.priv = priv; 1299 data.start = -1u; 1300 data.prot = 0; 1301 1302 for (i = 0; i < l1_sz; i++) { 1303 target_ulong base = i << (v_l1_shift + TARGET_PAGE_BITS); 1304 int rc = walk_memory_regions_1(&data, base, v_l2_levels, l1_map + i); 1305 if (rc != 0) { 1306 return rc; 1307 } 1308 } 1309 1310 return walk_memory_regions_end(&data, 0, 0); 1311 } 1312 1313 static int dump_region(void *priv, target_ulong start, 1314 target_ulong end, unsigned long prot) 1315 { 1316 FILE *f = (FILE *)priv; 1317 1318 (void) fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx 1319 " "TARGET_FMT_lx" %c%c%c\n", 1320 start, end, end - start, 1321 ((prot & PAGE_READ) ? 'r' : '-'), 1322 ((prot & PAGE_WRITE) ? 'w' : '-'), 1323 ((prot & PAGE_EXEC) ? 'x' : '-')); 1324 1325 return 0; 1326 } 1327 1328 /* dump memory mappings */ 1329 void page_dump(FILE *f) 1330 { 1331 const int length = sizeof(target_ulong) * 2; 1332 (void) fprintf(f, "%-*s %-*s %-*s %s\n", 1333 length, "start", length, "end", length, "size", "prot"); 1334 walk_memory_regions(f, dump_region); 1335 } 1336 1337 int page_get_flags(target_ulong address) 1338 { 1339 PageDesc *p; 1340 1341 p = page_find(address >> TARGET_PAGE_BITS); 1342 if (!p) { 1343 return 0; 1344 } 1345 return p->flags; 1346 } 1347 1348 /* 1349 * Allow the target to decide if PAGE_TARGET_[12] may be reset. 1350 * By default, they are not kept. 1351 */ 1352 #ifndef PAGE_TARGET_STICKY 1353 #define PAGE_TARGET_STICKY 0 1354 #endif 1355 #define PAGE_STICKY (PAGE_ANON | PAGE_PASSTHROUGH | PAGE_TARGET_STICKY) 1356 1357 /* Modify the flags of a page and invalidate the code if necessary. 1358 The flag PAGE_WRITE_ORG is positioned automatically depending 1359 on PAGE_WRITE. The mmap_lock should already be held. */ 1360 void page_set_flags(target_ulong start, target_ulong end, int flags) 1361 { 1362 target_ulong addr, len; 1363 bool reset, inval_tb = false; 1364 1365 /* This function should never be called with addresses outside the 1366 guest address space. If this assert fires, it probably indicates 1367 a missing call to h2g_valid. */ 1368 assert(end - 1 <= GUEST_ADDR_MAX); 1369 assert(start < end); 1370 /* Only set PAGE_ANON with new mappings. */ 1371 assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET)); 1372 assert_memory_lock(); 1373 1374 start = start & TARGET_PAGE_MASK; 1375 end = TARGET_PAGE_ALIGN(end); 1376 1377 if (flags & PAGE_WRITE) { 1378 flags |= PAGE_WRITE_ORG; 1379 } 1380 reset = !(flags & PAGE_VALID) || (flags & PAGE_RESET); 1381 if (reset) { 1382 page_reset_target_data(start, end); 1383 } 1384 flags &= ~PAGE_RESET; 1385 1386 for (addr = start, len = end - start; 1387 len != 0; 1388 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) { 1389 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, true); 1390 1391 /* 1392 * If the page was executable, but is reset, or is no longer 1393 * executable, or has become writable, then invalidate any code. 1394 */ 1395 if ((p->flags & PAGE_EXEC) 1396 && (reset || 1397 !(flags & PAGE_EXEC) || 1398 (flags & ~p->flags & PAGE_WRITE))) { 1399 inval_tb = true; 1400 } 1401 /* Using mprotect on a page does not change sticky bits. */ 1402 p->flags = (reset ? 0 : p->flags & PAGE_STICKY) | flags; 1403 } 1404 1405 if (inval_tb) { 1406 tb_invalidate_phys_range(start, end); 1407 } 1408 } 1409 1410 int page_check_range(target_ulong start, target_ulong len, int flags) 1411 { 1412 PageDesc *p; 1413 target_ulong end; 1414 target_ulong addr; 1415 1416 /* This function should never be called with addresses outside the 1417 guest address space. If this assert fires, it probably indicates 1418 a missing call to h2g_valid. */ 1419 if (TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS) { 1420 assert(start < ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS)); 1421 } 1422 1423 if (len == 0) { 1424 return 0; 1425 } 1426 if (start + len - 1 < start) { 1427 /* We've wrapped around. */ 1428 return -1; 1429 } 1430 1431 /* must do before we loose bits in the next step */ 1432 end = TARGET_PAGE_ALIGN(start + len); 1433 start = start & TARGET_PAGE_MASK; 1434 1435 for (addr = start, len = end - start; 1436 len != 0; 1437 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) { 1438 p = page_find(addr >> TARGET_PAGE_BITS); 1439 if (!p) { 1440 return -1; 1441 } 1442 if (!(p->flags & PAGE_VALID)) { 1443 return -1; 1444 } 1445 1446 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ)) { 1447 return -1; 1448 } 1449 if (flags & PAGE_WRITE) { 1450 if (!(p->flags & PAGE_WRITE_ORG)) { 1451 return -1; 1452 } 1453 /* unprotect the page if it was put read-only because it 1454 contains translated code */ 1455 if (!(p->flags & PAGE_WRITE)) { 1456 if (!page_unprotect(addr, 0)) { 1457 return -1; 1458 } 1459 } 1460 } 1461 } 1462 return 0; 1463 } 1464 1465 void page_protect(tb_page_addr_t page_addr) 1466 { 1467 target_ulong addr; 1468 PageDesc *p; 1469 int prot; 1470 1471 p = page_find(page_addr >> TARGET_PAGE_BITS); 1472 if (p && (p->flags & PAGE_WRITE)) { 1473 /* 1474 * Force the host page as non writable (writes will have a page fault + 1475 * mprotect overhead). 1476 */ 1477 page_addr &= qemu_host_page_mask; 1478 prot = 0; 1479 for (addr = page_addr; addr < page_addr + qemu_host_page_size; 1480 addr += TARGET_PAGE_SIZE) { 1481 1482 p = page_find(addr >> TARGET_PAGE_BITS); 1483 if (!p) { 1484 continue; 1485 } 1486 prot |= p->flags; 1487 p->flags &= ~PAGE_WRITE; 1488 } 1489 mprotect(g2h_untagged(page_addr), qemu_host_page_size, 1490 (prot & PAGE_BITS) & ~PAGE_WRITE); 1491 } 1492 } 1493 1494 /* called from signal handler: invalidate the code and unprotect the 1495 * page. Return 0 if the fault was not handled, 1 if it was handled, 1496 * and 2 if it was handled but the caller must cause the TB to be 1497 * immediately exited. (We can only return 2 if the 'pc' argument is 1498 * non-zero.) 1499 */ 1500 int page_unprotect(target_ulong address, uintptr_t pc) 1501 { 1502 unsigned int prot; 1503 bool current_tb_invalidated; 1504 PageDesc *p; 1505 target_ulong host_start, host_end, addr; 1506 1507 /* Technically this isn't safe inside a signal handler. However we 1508 know this only ever happens in a synchronous SEGV handler, so in 1509 practice it seems to be ok. */ 1510 mmap_lock(); 1511 1512 p = page_find(address >> TARGET_PAGE_BITS); 1513 if (!p) { 1514 mmap_unlock(); 1515 return 0; 1516 } 1517 1518 /* if the page was really writable, then we change its 1519 protection back to writable */ 1520 if (p->flags & PAGE_WRITE_ORG) { 1521 current_tb_invalidated = false; 1522 if (p->flags & PAGE_WRITE) { 1523 /* If the page is actually marked WRITE then assume this is because 1524 * this thread raced with another one which got here first and 1525 * set the page to PAGE_WRITE and did the TB invalidate for us. 1526 */ 1527 #ifdef TARGET_HAS_PRECISE_SMC 1528 TranslationBlock *current_tb = tcg_tb_lookup(pc); 1529 if (current_tb) { 1530 current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID; 1531 } 1532 #endif 1533 } else { 1534 host_start = address & qemu_host_page_mask; 1535 host_end = host_start + qemu_host_page_size; 1536 1537 prot = 0; 1538 for (addr = host_start; addr < host_end; addr += TARGET_PAGE_SIZE) { 1539 p = page_find(addr >> TARGET_PAGE_BITS); 1540 p->flags |= PAGE_WRITE; 1541 prot |= p->flags; 1542 1543 /* and since the content will be modified, we must invalidate 1544 the corresponding translated code. */ 1545 current_tb_invalidated |= 1546 tb_invalidate_phys_page_unwind(addr, pc); 1547 } 1548 mprotect((void *)g2h_untagged(host_start), qemu_host_page_size, 1549 prot & PAGE_BITS); 1550 } 1551 mmap_unlock(); 1552 /* If current TB was invalidated return to main loop */ 1553 return current_tb_invalidated ? 2 : 1; 1554 } 1555 mmap_unlock(); 1556 return 0; 1557 } 1558 #endif /* CONFIG_USER_ONLY */ 1559 1560 /* 1561 * Called by generic code at e.g. cpu reset after cpu creation, 1562 * therefore we must be prepared to allocate the jump cache. 1563 */ 1564 void tcg_flush_jmp_cache(CPUState *cpu) 1565 { 1566 CPUJumpCache *jc = cpu->tb_jmp_cache; 1567 1568 if (likely(jc)) { 1569 for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) { 1570 qatomic_set(&jc->array[i].tb, NULL); 1571 } 1572 } else { 1573 /* This should happen once during realize, and thus never race. */ 1574 jc = g_new0(CPUJumpCache, 1); 1575 jc = qatomic_xchg(&cpu->tb_jmp_cache, jc); 1576 assert(jc == NULL); 1577 } 1578 } 1579 1580 /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */ 1581 void tcg_flush_softmmu_tlb(CPUState *cs) 1582 { 1583 #ifdef CONFIG_SOFTMMU 1584 tlb_flush(cs); 1585 #endif 1586 } 1587