1 /* 2 * Host code generation 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 22 #define NO_CPU_IO_DEFS 23 #include "trace.h" 24 #include "disas/disas.h" 25 #include "exec/exec-all.h" 26 #include "tcg/tcg.h" 27 #if defined(CONFIG_USER_ONLY) 28 #include "qemu.h" 29 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 30 #include <sys/param.h> 31 #if __FreeBSD_version >= 700104 32 #define HAVE_KINFO_GETVMMAP 33 #define sigqueue sigqueue_freebsd /* avoid redefinition */ 34 #include <sys/proc.h> 35 #include <machine/profile.h> 36 #define _KERNEL 37 #include <sys/user.h> 38 #undef _KERNEL 39 #undef sigqueue 40 #include <libutil.h> 41 #endif 42 #endif 43 #else 44 #include "exec/ram_addr.h" 45 #endif 46 47 #include "exec/cputlb.h" 48 #include "exec/translate-all.h" 49 #include "exec/translator.h" 50 #include "qemu/bitmap.h" 51 #include "qemu/qemu-print.h" 52 #include "qemu/timer.h" 53 #include "qemu/main-loop.h" 54 #include "qemu/cacheinfo.h" 55 #include "exec/log.h" 56 #include "sysemu/cpus.h" 57 #include "sysemu/cpu-timers.h" 58 #include "sysemu/tcg.h" 59 #include "qapi/error.h" 60 #include "hw/core/tcg-cpu-ops.h" 61 #include "tb-jmp-cache.h" 62 #include "tb-hash.h" 63 #include "tb-context.h" 64 #include "internal.h" 65 66 /* make various TB consistency checks */ 67 68 /** 69 * struct page_entry - page descriptor entry 70 * @pd: pointer to the &struct PageDesc of the page this entry represents 71 * @index: page index of the page 72 * @locked: whether the page is locked 73 * 74 * This struct helps us keep track of the locked state of a page, without 75 * bloating &struct PageDesc. 76 * 77 * A page lock protects accesses to all fields of &struct PageDesc. 78 * 79 * See also: &struct page_collection. 80 */ 81 struct page_entry { 82 PageDesc *pd; 83 tb_page_addr_t index; 84 bool locked; 85 }; 86 87 /** 88 * struct page_collection - tracks a set of pages (i.e. &struct page_entry's) 89 * @tree: Binary search tree (BST) of the pages, with key == page index 90 * @max: Pointer to the page in @tree with the highest page index 91 * 92 * To avoid deadlock we lock pages in ascending order of page index. 93 * When operating on a set of pages, we need to keep track of them so that 94 * we can lock them in order and also unlock them later. For this we collect 95 * pages (i.e. &struct page_entry's) in a binary search @tree. Given that the 96 * @tree implementation we use does not provide an O(1) operation to obtain the 97 * highest-ranked element, we use @max to keep track of the inserted page 98 * with the highest index. This is valuable because if a page is not in 99 * the tree and its index is higher than @max's, then we can lock it 100 * without breaking the locking order rule. 101 * 102 * Note on naming: 'struct page_set' would be shorter, but we already have a few 103 * page_set_*() helpers, so page_collection is used instead to avoid confusion. 104 * 105 * See also: page_collection_lock(). 106 */ 107 struct page_collection { 108 GTree *tree; 109 struct page_entry *max; 110 }; 111 112 /* 113 * In system mode we want L1_MAP to be based on ram offsets, 114 * while in user mode we want it to be based on virtual addresses. 115 * 116 * TODO: For user mode, see the caveat re host vs guest virtual 117 * address spaces near GUEST_ADDR_MAX. 118 */ 119 #if !defined(CONFIG_USER_ONLY) 120 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS 121 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS 122 #else 123 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS 124 #endif 125 #else 126 # define L1_MAP_ADDR_SPACE_BITS MIN(HOST_LONG_BITS, TARGET_ABI_BITS) 127 #endif 128 129 /* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */ 130 QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS > 131 sizeof_field(TranslationBlock, trace_vcpu_dstate) 132 * BITS_PER_BYTE); 133 134 /* 135 * L1 Mapping properties 136 */ 137 int v_l1_size; 138 int v_l1_shift; 139 int v_l2_levels; 140 141 void *l1_map[V_L1_MAX_SIZE]; 142 143 TBContext tb_ctx; 144 145 static void page_table_config_init(void) 146 { 147 uint32_t v_l1_bits; 148 149 assert(TARGET_PAGE_BITS); 150 /* The bits remaining after N lower levels of page tables. */ 151 v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS; 152 if (v_l1_bits < V_L1_MIN_BITS) { 153 v_l1_bits += V_L2_BITS; 154 } 155 156 v_l1_size = 1 << v_l1_bits; 157 v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits; 158 v_l2_levels = v_l1_shift / V_L2_BITS - 1; 159 160 assert(v_l1_bits <= V_L1_MAX_BITS); 161 assert(v_l1_shift % V_L2_BITS == 0); 162 assert(v_l2_levels >= 0); 163 } 164 165 /* Encode VAL as a signed leb128 sequence at P. 166 Return P incremented past the encoded value. */ 167 static uint8_t *encode_sleb128(uint8_t *p, target_long val) 168 { 169 int more, byte; 170 171 do { 172 byte = val & 0x7f; 173 val >>= 7; 174 more = !((val == 0 && (byte & 0x40) == 0) 175 || (val == -1 && (byte & 0x40) != 0)); 176 if (more) { 177 byte |= 0x80; 178 } 179 *p++ = byte; 180 } while (more); 181 182 return p; 183 } 184 185 /* Decode a signed leb128 sequence at *PP; increment *PP past the 186 decoded value. Return the decoded value. */ 187 static target_long decode_sleb128(const uint8_t **pp) 188 { 189 const uint8_t *p = *pp; 190 target_long val = 0; 191 int byte, shift = 0; 192 193 do { 194 byte = *p++; 195 val |= (target_ulong)(byte & 0x7f) << shift; 196 shift += 7; 197 } while (byte & 0x80); 198 if (shift < TARGET_LONG_BITS && (byte & 0x40)) { 199 val |= -(target_ulong)1 << shift; 200 } 201 202 *pp = p; 203 return val; 204 } 205 206 /* Encode the data collected about the instructions while compiling TB. 207 Place the data at BLOCK, and return the number of bytes consumed. 208 209 The logical table consists of TARGET_INSN_START_WORDS target_ulong's, 210 which come from the target's insn_start data, followed by a uintptr_t 211 which comes from the host pc of the end of the code implementing the insn. 212 213 Each line of the table is encoded as sleb128 deltas from the previous 214 line. The seed for the first line is { tb->pc, 0..., tb->tc.ptr }. 215 That is, the first column is seeded with the guest pc, the last column 216 with the host pc, and the middle columns with zeros. */ 217 218 static int encode_search(TranslationBlock *tb, uint8_t *block) 219 { 220 uint8_t *highwater = tcg_ctx->code_gen_highwater; 221 uint8_t *p = block; 222 int i, j, n; 223 224 for (i = 0, n = tb->icount; i < n; ++i) { 225 target_ulong prev; 226 227 for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { 228 if (i == 0) { 229 prev = (!TARGET_TB_PCREL && j == 0 ? tb_pc(tb) : 0); 230 } else { 231 prev = tcg_ctx->gen_insn_data[i - 1][j]; 232 } 233 p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev); 234 } 235 prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]); 236 p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev); 237 238 /* Test for (pending) buffer overflow. The assumption is that any 239 one row beginning below the high water mark cannot overrun 240 the buffer completely. Thus we can test for overflow after 241 encoding a row without having to check during encoding. */ 242 if (unlikely(p > highwater)) { 243 return -1; 244 } 245 } 246 247 return p - block; 248 } 249 250 /* The cpu state corresponding to 'searched_pc' is restored. 251 * When reset_icount is true, current TB will be interrupted and 252 * icount should be recalculated. 253 */ 254 int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, 255 uintptr_t searched_pc, bool reset_icount) 256 { 257 target_ulong data[TARGET_INSN_START_WORDS]; 258 uintptr_t host_pc = (uintptr_t)tb->tc.ptr; 259 const uint8_t *p = tb->tc.ptr + tb->tc.size; 260 int i, j, num_insns = tb->icount; 261 #ifdef CONFIG_PROFILER 262 TCGProfile *prof = &tcg_ctx->prof; 263 int64_t ti = profile_getclock(); 264 #endif 265 266 searched_pc -= GETPC_ADJ; 267 268 if (searched_pc < host_pc) { 269 return -1; 270 } 271 272 memset(data, 0, sizeof(data)); 273 if (!TARGET_TB_PCREL) { 274 data[0] = tb_pc(tb); 275 } 276 277 /* Reconstruct the stored insn data while looking for the point at 278 which the end of the insn exceeds the searched_pc. */ 279 for (i = 0; i < num_insns; ++i) { 280 for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { 281 data[j] += decode_sleb128(&p); 282 } 283 host_pc += decode_sleb128(&p); 284 if (host_pc > searched_pc) { 285 goto found; 286 } 287 } 288 return -1; 289 290 found: 291 if (reset_icount && (tb_cflags(tb) & CF_USE_ICOUNT)) { 292 assert(icount_enabled()); 293 /* Reset the cycle counter to the start of the block 294 and shift if to the number of actually executed instructions */ 295 cpu_neg(cpu)->icount_decr.u16.low += num_insns - i; 296 } 297 298 { 299 const struct TCGCPUOps *ops = cpu->cc->tcg_ops; 300 __typeof(ops->restore_state_to_opc) restore = ops->restore_state_to_opc; 301 if (restore) { 302 uint64_t d64[TARGET_INSN_START_WORDS]; 303 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 304 d64[i] = data[i]; 305 } 306 restore(cpu, tb, d64); 307 } else { 308 restore_state_to_opc(cpu->env_ptr, tb, data); 309 } 310 } 311 312 #ifdef CONFIG_PROFILER 313 qatomic_set(&prof->restore_time, 314 prof->restore_time + profile_getclock() - ti); 315 qatomic_set(&prof->restore_count, prof->restore_count + 1); 316 #endif 317 return 0; 318 } 319 320 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit) 321 { 322 /* 323 * The pc update associated with restore without exit will 324 * break the relative pc adjustments performed by TARGET_TB_PCREL. 325 */ 326 if (TARGET_TB_PCREL) { 327 assert(will_exit); 328 } 329 330 /* 331 * The host_pc has to be in the rx region of the code buffer. 332 * If it is not we will not be able to resolve it here. 333 * The two cases where host_pc will not be correct are: 334 * 335 * - fault during translation (instruction fetch) 336 * - fault from helper (not using GETPC() macro) 337 * 338 * Either way we need return early as we can't resolve it here. 339 */ 340 if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) { 341 TranslationBlock *tb = tcg_tb_lookup(host_pc); 342 if (tb) { 343 cpu_restore_state_from_tb(cpu, tb, host_pc, will_exit); 344 return true; 345 } 346 } 347 return false; 348 } 349 350 void page_init(void) 351 { 352 page_size_init(); 353 page_table_config_init(); 354 355 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY) 356 { 357 #ifdef HAVE_KINFO_GETVMMAP 358 struct kinfo_vmentry *freep; 359 int i, cnt; 360 361 freep = kinfo_getvmmap(getpid(), &cnt); 362 if (freep) { 363 mmap_lock(); 364 for (i = 0; i < cnt; i++) { 365 unsigned long startaddr, endaddr; 366 367 startaddr = freep[i].kve_start; 368 endaddr = freep[i].kve_end; 369 if (h2g_valid(startaddr)) { 370 startaddr = h2g(startaddr) & TARGET_PAGE_MASK; 371 372 if (h2g_valid(endaddr)) { 373 endaddr = h2g(endaddr); 374 page_set_flags(startaddr, endaddr, PAGE_RESERVED); 375 } else { 376 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS 377 endaddr = ~0ul; 378 page_set_flags(startaddr, endaddr, PAGE_RESERVED); 379 #endif 380 } 381 } 382 } 383 free(freep); 384 mmap_unlock(); 385 } 386 #else 387 FILE *f; 388 389 last_brk = (unsigned long)sbrk(0); 390 391 f = fopen("/compat/linux/proc/self/maps", "r"); 392 if (f) { 393 mmap_lock(); 394 395 do { 396 unsigned long startaddr, endaddr; 397 int n; 398 399 n = fscanf(f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr); 400 401 if (n == 2 && h2g_valid(startaddr)) { 402 startaddr = h2g(startaddr) & TARGET_PAGE_MASK; 403 404 if (h2g_valid(endaddr)) { 405 endaddr = h2g(endaddr); 406 } else { 407 endaddr = ~0ul; 408 } 409 page_set_flags(startaddr, endaddr, PAGE_RESERVED); 410 } 411 } while (!feof(f)); 412 413 fclose(f); 414 mmap_unlock(); 415 } 416 #endif 417 } 418 #endif 419 } 420 421 PageDesc *page_find_alloc(tb_page_addr_t index, bool alloc) 422 { 423 PageDesc *pd; 424 void **lp; 425 int i; 426 427 /* Level 1. Always allocated. */ 428 lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1)); 429 430 /* Level 2..N-1. */ 431 for (i = v_l2_levels; i > 0; i--) { 432 void **p = qatomic_rcu_read(lp); 433 434 if (p == NULL) { 435 void *existing; 436 437 if (!alloc) { 438 return NULL; 439 } 440 p = g_new0(void *, V_L2_SIZE); 441 existing = qatomic_cmpxchg(lp, NULL, p); 442 if (unlikely(existing)) { 443 g_free(p); 444 p = existing; 445 } 446 } 447 448 lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1)); 449 } 450 451 pd = qatomic_rcu_read(lp); 452 if (pd == NULL) { 453 void *existing; 454 455 if (!alloc) { 456 return NULL; 457 } 458 pd = g_new0(PageDesc, V_L2_SIZE); 459 #ifndef CONFIG_USER_ONLY 460 { 461 int i; 462 463 for (i = 0; i < V_L2_SIZE; i++) { 464 qemu_spin_init(&pd[i].lock); 465 } 466 } 467 #endif 468 existing = qatomic_cmpxchg(lp, NULL, pd); 469 if (unlikely(existing)) { 470 #ifndef CONFIG_USER_ONLY 471 { 472 int i; 473 474 for (i = 0; i < V_L2_SIZE; i++) { 475 qemu_spin_destroy(&pd[i].lock); 476 } 477 } 478 #endif 479 g_free(pd); 480 pd = existing; 481 } 482 } 483 484 return pd + (index & (V_L2_SIZE - 1)); 485 } 486 487 /* In user-mode page locks aren't used; mmap_lock is enough */ 488 #ifdef CONFIG_USER_ONLY 489 struct page_collection * 490 page_collection_lock(tb_page_addr_t start, tb_page_addr_t end) 491 { 492 return NULL; 493 } 494 495 void page_collection_unlock(struct page_collection *set) 496 { } 497 #else /* !CONFIG_USER_ONLY */ 498 499 #ifdef CONFIG_DEBUG_TCG 500 501 static __thread GHashTable *ht_pages_locked_debug; 502 503 static void ht_pages_locked_debug_init(void) 504 { 505 if (ht_pages_locked_debug) { 506 return; 507 } 508 ht_pages_locked_debug = g_hash_table_new(NULL, NULL); 509 } 510 511 static bool page_is_locked(const PageDesc *pd) 512 { 513 PageDesc *found; 514 515 ht_pages_locked_debug_init(); 516 found = g_hash_table_lookup(ht_pages_locked_debug, pd); 517 return !!found; 518 } 519 520 static void page_lock__debug(PageDesc *pd) 521 { 522 ht_pages_locked_debug_init(); 523 g_assert(!page_is_locked(pd)); 524 g_hash_table_insert(ht_pages_locked_debug, pd, pd); 525 } 526 527 static void page_unlock__debug(const PageDesc *pd) 528 { 529 bool removed; 530 531 ht_pages_locked_debug_init(); 532 g_assert(page_is_locked(pd)); 533 removed = g_hash_table_remove(ht_pages_locked_debug, pd); 534 g_assert(removed); 535 } 536 537 void do_assert_page_locked(const PageDesc *pd, const char *file, int line) 538 { 539 if (unlikely(!page_is_locked(pd))) { 540 error_report("assert_page_lock: PageDesc %p not locked @ %s:%d", 541 pd, file, line); 542 abort(); 543 } 544 } 545 546 void assert_no_pages_locked(void) 547 { 548 ht_pages_locked_debug_init(); 549 g_assert(g_hash_table_size(ht_pages_locked_debug) == 0); 550 } 551 552 #else /* !CONFIG_DEBUG_TCG */ 553 554 static inline void page_lock__debug(const PageDesc *pd) { } 555 static inline void page_unlock__debug(const PageDesc *pd) { } 556 557 #endif /* CONFIG_DEBUG_TCG */ 558 559 void page_lock(PageDesc *pd) 560 { 561 page_lock__debug(pd); 562 qemu_spin_lock(&pd->lock); 563 } 564 565 void page_unlock(PageDesc *pd) 566 { 567 qemu_spin_unlock(&pd->lock); 568 page_unlock__debug(pd); 569 } 570 571 static inline struct page_entry * 572 page_entry_new(PageDesc *pd, tb_page_addr_t index) 573 { 574 struct page_entry *pe = g_malloc(sizeof(*pe)); 575 576 pe->index = index; 577 pe->pd = pd; 578 pe->locked = false; 579 return pe; 580 } 581 582 static void page_entry_destroy(gpointer p) 583 { 584 struct page_entry *pe = p; 585 586 g_assert(pe->locked); 587 page_unlock(pe->pd); 588 g_free(pe); 589 } 590 591 /* returns false on success */ 592 static bool page_entry_trylock(struct page_entry *pe) 593 { 594 bool busy; 595 596 busy = qemu_spin_trylock(&pe->pd->lock); 597 if (!busy) { 598 g_assert(!pe->locked); 599 pe->locked = true; 600 page_lock__debug(pe->pd); 601 } 602 return busy; 603 } 604 605 static void do_page_entry_lock(struct page_entry *pe) 606 { 607 page_lock(pe->pd); 608 g_assert(!pe->locked); 609 pe->locked = true; 610 } 611 612 static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data) 613 { 614 struct page_entry *pe = value; 615 616 do_page_entry_lock(pe); 617 return FALSE; 618 } 619 620 static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data) 621 { 622 struct page_entry *pe = value; 623 624 if (pe->locked) { 625 pe->locked = false; 626 page_unlock(pe->pd); 627 } 628 return FALSE; 629 } 630 631 /* 632 * Trylock a page, and if successful, add the page to a collection. 633 * Returns true ("busy") if the page could not be locked; false otherwise. 634 */ 635 static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr) 636 { 637 tb_page_addr_t index = addr >> TARGET_PAGE_BITS; 638 struct page_entry *pe; 639 PageDesc *pd; 640 641 pe = g_tree_lookup(set->tree, &index); 642 if (pe) { 643 return false; 644 } 645 646 pd = page_find(index); 647 if (pd == NULL) { 648 return false; 649 } 650 651 pe = page_entry_new(pd, index); 652 g_tree_insert(set->tree, &pe->index, pe); 653 654 /* 655 * If this is either (1) the first insertion or (2) a page whose index 656 * is higher than any other so far, just lock the page and move on. 657 */ 658 if (set->max == NULL || pe->index > set->max->index) { 659 set->max = pe; 660 do_page_entry_lock(pe); 661 return false; 662 } 663 /* 664 * Try to acquire out-of-order lock; if busy, return busy so that we acquire 665 * locks in order. 666 */ 667 return page_entry_trylock(pe); 668 } 669 670 static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata) 671 { 672 tb_page_addr_t a = *(const tb_page_addr_t *)ap; 673 tb_page_addr_t b = *(const tb_page_addr_t *)bp; 674 675 if (a == b) { 676 return 0; 677 } else if (a < b) { 678 return -1; 679 } 680 return 1; 681 } 682 683 /* 684 * Lock a range of pages ([@start,@end[) as well as the pages of all 685 * intersecting TBs. 686 * Locking order: acquire locks in ascending order of page index. 687 */ 688 struct page_collection * 689 page_collection_lock(tb_page_addr_t start, tb_page_addr_t end) 690 { 691 struct page_collection *set = g_malloc(sizeof(*set)); 692 tb_page_addr_t index; 693 PageDesc *pd; 694 695 start >>= TARGET_PAGE_BITS; 696 end >>= TARGET_PAGE_BITS; 697 g_assert(start <= end); 698 699 set->tree = g_tree_new_full(tb_page_addr_cmp, NULL, NULL, 700 page_entry_destroy); 701 set->max = NULL; 702 assert_no_pages_locked(); 703 704 retry: 705 g_tree_foreach(set->tree, page_entry_lock, NULL); 706 707 for (index = start; index <= end; index++) { 708 TranslationBlock *tb; 709 int n; 710 711 pd = page_find(index); 712 if (pd == NULL) { 713 continue; 714 } 715 if (page_trylock_add(set, index << TARGET_PAGE_BITS)) { 716 g_tree_foreach(set->tree, page_entry_unlock, NULL); 717 goto retry; 718 } 719 assert_page_locked(pd); 720 PAGE_FOR_EACH_TB(pd, tb, n) { 721 if (page_trylock_add(set, tb_page_addr0(tb)) || 722 (tb_page_addr1(tb) != -1 && 723 page_trylock_add(set, tb_page_addr1(tb)))) { 724 /* drop all locks, and reacquire in order */ 725 g_tree_foreach(set->tree, page_entry_unlock, NULL); 726 goto retry; 727 } 728 } 729 } 730 return set; 731 } 732 733 void page_collection_unlock(struct page_collection *set) 734 { 735 /* entries are unlocked and freed via page_entry_destroy */ 736 g_tree_destroy(set->tree); 737 g_free(set); 738 } 739 740 #endif /* !CONFIG_USER_ONLY */ 741 742 /* Called with mmap_lock held for user mode emulation. */ 743 TranslationBlock *tb_gen_code(CPUState *cpu, 744 target_ulong pc, target_ulong cs_base, 745 uint32_t flags, int cflags) 746 { 747 CPUArchState *env = cpu->env_ptr; 748 TranslationBlock *tb, *existing_tb; 749 tb_page_addr_t phys_pc; 750 tcg_insn_unit *gen_code_buf; 751 int gen_code_size, search_size, max_insns; 752 #ifdef CONFIG_PROFILER 753 TCGProfile *prof = &tcg_ctx->prof; 754 int64_t ti; 755 #endif 756 void *host_pc; 757 758 assert_memory_lock(); 759 qemu_thread_jit_write(); 760 761 phys_pc = get_page_addr_code_hostp(env, pc, &host_pc); 762 763 if (phys_pc == -1) { 764 /* Generate a one-shot TB with 1 insn in it */ 765 cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1; 766 } 767 768 max_insns = cflags & CF_COUNT_MASK; 769 if (max_insns == 0) { 770 max_insns = TCG_MAX_INSNS; 771 } 772 QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS); 773 774 buffer_overflow: 775 tb = tcg_tb_alloc(tcg_ctx); 776 if (unlikely(!tb)) { 777 /* flush must be done */ 778 tb_flush(cpu); 779 mmap_unlock(); 780 /* Make the execution loop process the flush as soon as possible. */ 781 cpu->exception_index = EXCP_INTERRUPT; 782 cpu_loop_exit(cpu); 783 } 784 785 gen_code_buf = tcg_ctx->code_gen_ptr; 786 tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf); 787 #if !TARGET_TB_PCREL 788 tb->pc = pc; 789 #endif 790 tb->cs_base = cs_base; 791 tb->flags = flags; 792 tb->cflags = cflags; 793 tb->trace_vcpu_dstate = *cpu->trace_dstate; 794 tb_set_page_addr0(tb, phys_pc); 795 tb_set_page_addr1(tb, -1); 796 tcg_ctx->tb_cflags = cflags; 797 tb_overflow: 798 799 #ifdef CONFIG_PROFILER 800 /* includes aborted translations because of exceptions */ 801 qatomic_set(&prof->tb_count1, prof->tb_count1 + 1); 802 ti = profile_getclock(); 803 #endif 804 805 gen_code_size = sigsetjmp(tcg_ctx->jmp_trans, 0); 806 if (unlikely(gen_code_size != 0)) { 807 goto error_return; 808 } 809 810 tcg_func_start(tcg_ctx); 811 812 tcg_ctx->cpu = env_cpu(env); 813 gen_intermediate_code(cpu, tb, max_insns, pc, host_pc); 814 assert(tb->size != 0); 815 tcg_ctx->cpu = NULL; 816 max_insns = tb->icount; 817 818 trace_translate_block(tb, pc, tb->tc.ptr); 819 820 /* generate machine code */ 821 tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID; 822 tb->jmp_reset_offset[1] = TB_JMP_RESET_OFFSET_INVALID; 823 tcg_ctx->tb_jmp_reset_offset = tb->jmp_reset_offset; 824 if (TCG_TARGET_HAS_direct_jump) { 825 tcg_ctx->tb_jmp_insn_offset = tb->jmp_target_arg; 826 tcg_ctx->tb_jmp_target_addr = NULL; 827 } else { 828 tcg_ctx->tb_jmp_insn_offset = NULL; 829 tcg_ctx->tb_jmp_target_addr = tb->jmp_target_arg; 830 } 831 832 #ifdef CONFIG_PROFILER 833 qatomic_set(&prof->tb_count, prof->tb_count + 1); 834 qatomic_set(&prof->interm_time, 835 prof->interm_time + profile_getclock() - ti); 836 ti = profile_getclock(); 837 #endif 838 839 gen_code_size = tcg_gen_code(tcg_ctx, tb, pc); 840 if (unlikely(gen_code_size < 0)) { 841 error_return: 842 switch (gen_code_size) { 843 case -1: 844 /* 845 * Overflow of code_gen_buffer, or the current slice of it. 846 * 847 * TODO: We don't need to re-do gen_intermediate_code, nor 848 * should we re-do the tcg optimization currently hidden 849 * inside tcg_gen_code. All that should be required is to 850 * flush the TBs, allocate a new TB, re-initialize it per 851 * above, and re-do the actual code generation. 852 */ 853 qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT, 854 "Restarting code generation for " 855 "code_gen_buffer overflow\n"); 856 goto buffer_overflow; 857 858 case -2: 859 /* 860 * The code generated for the TranslationBlock is too large. 861 * The maximum size allowed by the unwind info is 64k. 862 * There may be stricter constraints from relocations 863 * in the tcg backend. 864 * 865 * Try again with half as many insns as we attempted this time. 866 * If a single insn overflows, there's a bug somewhere... 867 */ 868 assert(max_insns > 1); 869 max_insns /= 2; 870 qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT, 871 "Restarting code generation with " 872 "smaller translation block (max %d insns)\n", 873 max_insns); 874 goto tb_overflow; 875 876 default: 877 g_assert_not_reached(); 878 } 879 } 880 search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size); 881 if (unlikely(search_size < 0)) { 882 goto buffer_overflow; 883 } 884 tb->tc.size = gen_code_size; 885 886 #ifdef CONFIG_PROFILER 887 qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti); 888 qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size); 889 qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size); 890 qatomic_set(&prof->search_out_len, prof->search_out_len + search_size); 891 #endif 892 893 #ifdef DEBUG_DISAS 894 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) && 895 qemu_log_in_addr_range(pc)) { 896 FILE *logfile = qemu_log_trylock(); 897 if (logfile) { 898 int code_size, data_size; 899 const tcg_target_ulong *rx_data_gen_ptr; 900 size_t chunk_start; 901 int insn = 0; 902 903 if (tcg_ctx->data_gen_ptr) { 904 rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr); 905 code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr; 906 data_size = gen_code_size - code_size; 907 } else { 908 rx_data_gen_ptr = 0; 909 code_size = gen_code_size; 910 data_size = 0; 911 } 912 913 /* Dump header and the first instruction */ 914 fprintf(logfile, "OUT: [size=%d]\n", gen_code_size); 915 fprintf(logfile, 916 " -- guest addr 0x" TARGET_FMT_lx " + tb prologue\n", 917 tcg_ctx->gen_insn_data[insn][0]); 918 chunk_start = tcg_ctx->gen_insn_end_off[insn]; 919 disas(logfile, tb->tc.ptr, chunk_start); 920 921 /* 922 * Dump each instruction chunk, wrapping up empty chunks into 923 * the next instruction. The whole array is offset so the 924 * first entry is the beginning of the 2nd instruction. 925 */ 926 while (insn < tb->icount) { 927 size_t chunk_end = tcg_ctx->gen_insn_end_off[insn]; 928 if (chunk_end > chunk_start) { 929 fprintf(logfile, " -- guest addr 0x" TARGET_FMT_lx "\n", 930 tcg_ctx->gen_insn_data[insn][0]); 931 disas(logfile, tb->tc.ptr + chunk_start, 932 chunk_end - chunk_start); 933 chunk_start = chunk_end; 934 } 935 insn++; 936 } 937 938 if (chunk_start < code_size) { 939 fprintf(logfile, " -- tb slow paths + alignment\n"); 940 disas(logfile, tb->tc.ptr + chunk_start, 941 code_size - chunk_start); 942 } 943 944 /* Finally dump any data we may have after the block */ 945 if (data_size) { 946 int i; 947 fprintf(logfile, " data: [size=%d]\n", data_size); 948 for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) { 949 if (sizeof(tcg_target_ulong) == 8) { 950 fprintf(logfile, 951 "0x%08" PRIxPTR ": .quad 0x%016" TCG_PRIlx "\n", 952 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]); 953 } else if (sizeof(tcg_target_ulong) == 4) { 954 fprintf(logfile, 955 "0x%08" PRIxPTR ": .long 0x%08" TCG_PRIlx "\n", 956 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]); 957 } else { 958 qemu_build_not_reached(); 959 } 960 } 961 } 962 fprintf(logfile, "\n"); 963 qemu_log_unlock(logfile); 964 } 965 } 966 #endif 967 968 qatomic_set(&tcg_ctx->code_gen_ptr, (void *) 969 ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size, 970 CODE_GEN_ALIGN)); 971 972 /* init jump list */ 973 qemu_spin_init(&tb->jmp_lock); 974 tb->jmp_list_head = (uintptr_t)NULL; 975 tb->jmp_list_next[0] = (uintptr_t)NULL; 976 tb->jmp_list_next[1] = (uintptr_t)NULL; 977 tb->jmp_dest[0] = (uintptr_t)NULL; 978 tb->jmp_dest[1] = (uintptr_t)NULL; 979 980 /* init original jump addresses which have been set during tcg_gen_code() */ 981 if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) { 982 tb_reset_jump(tb, 0); 983 } 984 if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) { 985 tb_reset_jump(tb, 1); 986 } 987 988 /* 989 * If the TB is not associated with a physical RAM page then it must be 990 * a temporary one-insn TB, and we have nothing left to do. Return early 991 * before attempting to link to other TBs or add to the lookup table. 992 */ 993 if (tb_page_addr0(tb) == -1) { 994 return tb; 995 } 996 997 /* 998 * Insert TB into the corresponding region tree before publishing it 999 * through QHT. Otherwise rewinding happened in the TB might fail to 1000 * lookup itself using host PC. 1001 */ 1002 tcg_tb_insert(tb); 1003 1004 /* 1005 * No explicit memory barrier is required -- tb_link_page() makes the 1006 * TB visible in a consistent state. 1007 */ 1008 existing_tb = tb_link_page(tb, tb_page_addr0(tb), tb_page_addr1(tb)); 1009 /* if the TB already exists, discard what we just translated */ 1010 if (unlikely(existing_tb != tb)) { 1011 uintptr_t orig_aligned = (uintptr_t)gen_code_buf; 1012 1013 orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize); 1014 qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned); 1015 tcg_tb_remove(tb); 1016 return existing_tb; 1017 } 1018 return tb; 1019 } 1020 1021 /* user-mode: call with mmap_lock held */ 1022 void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr) 1023 { 1024 TranslationBlock *tb; 1025 1026 assert_memory_lock(); 1027 1028 tb = tcg_tb_lookup(retaddr); 1029 if (tb) { 1030 /* We can use retranslation to find the PC. */ 1031 cpu_restore_state_from_tb(cpu, tb, retaddr, true); 1032 tb_phys_invalidate(tb, -1); 1033 } else { 1034 /* The exception probably happened in a helper. The CPU state should 1035 have been saved before calling it. Fetch the PC from there. */ 1036 CPUArchState *env = cpu->env_ptr; 1037 target_ulong pc, cs_base; 1038 tb_page_addr_t addr; 1039 uint32_t flags; 1040 1041 cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); 1042 addr = get_page_addr_code(env, pc); 1043 if (addr != -1) { 1044 tb_invalidate_phys_range(addr, addr + 1); 1045 } 1046 } 1047 } 1048 1049 #ifndef CONFIG_USER_ONLY 1050 /* 1051 * In deterministic execution mode, instructions doing device I/Os 1052 * must be at the end of the TB. 1053 * 1054 * Called by softmmu_template.h, with iothread mutex not held. 1055 */ 1056 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) 1057 { 1058 TranslationBlock *tb; 1059 CPUClass *cc; 1060 uint32_t n; 1061 1062 tb = tcg_tb_lookup(retaddr); 1063 if (!tb) { 1064 cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p", 1065 (void *)retaddr); 1066 } 1067 cpu_restore_state_from_tb(cpu, tb, retaddr, true); 1068 1069 /* 1070 * Some guests must re-execute the branch when re-executing a delay 1071 * slot instruction. When this is the case, adjust icount and N 1072 * to account for the re-execution of the branch. 1073 */ 1074 n = 1; 1075 cc = CPU_GET_CLASS(cpu); 1076 if (cc->tcg_ops->io_recompile_replay_branch && 1077 cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) { 1078 cpu_neg(cpu)->icount_decr.u16.low++; 1079 n = 2; 1080 } 1081 1082 /* 1083 * Exit the loop and potentially generate a new TB executing the 1084 * just the I/O insns. We also limit instrumentation to memory 1085 * operations only (which execute after completion) so we don't 1086 * double instrument the instruction. 1087 */ 1088 cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n; 1089 1090 if (qemu_loglevel_mask(CPU_LOG_EXEC)) { 1091 target_ulong pc = log_pc(cpu, tb); 1092 if (qemu_log_in_addr_range(pc)) { 1093 qemu_log("cpu_io_recompile: rewound execution of TB to " 1094 TARGET_FMT_lx "\n", pc); 1095 } 1096 } 1097 1098 cpu_loop_exit_noexc(cpu); 1099 } 1100 1101 static void print_qht_statistics(struct qht_stats hst, GString *buf) 1102 { 1103 uint32_t hgram_opts; 1104 size_t hgram_bins; 1105 char *hgram; 1106 1107 if (!hst.head_buckets) { 1108 return; 1109 } 1110 g_string_append_printf(buf, "TB hash buckets %zu/%zu " 1111 "(%0.2f%% head buckets used)\n", 1112 hst.used_head_buckets, hst.head_buckets, 1113 (double)hst.used_head_buckets / 1114 hst.head_buckets * 100); 1115 1116 hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; 1117 hgram_opts |= QDIST_PR_100X | QDIST_PR_PERCENT; 1118 if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) { 1119 hgram_opts |= QDIST_PR_NODECIMAL; 1120 } 1121 hgram = qdist_pr(&hst.occupancy, 10, hgram_opts); 1122 g_string_append_printf(buf, "TB hash occupancy %0.2f%% avg chain occ. " 1123 "Histogram: %s\n", 1124 qdist_avg(&hst.occupancy) * 100, hgram); 1125 g_free(hgram); 1126 1127 hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; 1128 hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain); 1129 if (hgram_bins > 10) { 1130 hgram_bins = 10; 1131 } else { 1132 hgram_bins = 0; 1133 hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE; 1134 } 1135 hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts); 1136 g_string_append_printf(buf, "TB hash avg chain %0.3f buckets. " 1137 "Histogram: %s\n", 1138 qdist_avg(&hst.chain), hgram); 1139 g_free(hgram); 1140 } 1141 1142 struct tb_tree_stats { 1143 size_t nb_tbs; 1144 size_t host_size; 1145 size_t target_size; 1146 size_t max_target_size; 1147 size_t direct_jmp_count; 1148 size_t direct_jmp2_count; 1149 size_t cross_page; 1150 }; 1151 1152 static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data) 1153 { 1154 const TranslationBlock *tb = value; 1155 struct tb_tree_stats *tst = data; 1156 1157 tst->nb_tbs++; 1158 tst->host_size += tb->tc.size; 1159 tst->target_size += tb->size; 1160 if (tb->size > tst->max_target_size) { 1161 tst->max_target_size = tb->size; 1162 } 1163 if (tb_page_addr1(tb) != -1) { 1164 tst->cross_page++; 1165 } 1166 if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) { 1167 tst->direct_jmp_count++; 1168 if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) { 1169 tst->direct_jmp2_count++; 1170 } 1171 } 1172 return false; 1173 } 1174 1175 void dump_exec_info(GString *buf) 1176 { 1177 struct tb_tree_stats tst = {}; 1178 struct qht_stats hst; 1179 size_t nb_tbs, flush_full, flush_part, flush_elide; 1180 1181 tcg_tb_foreach(tb_tree_stats_iter, &tst); 1182 nb_tbs = tst.nb_tbs; 1183 /* XXX: avoid using doubles ? */ 1184 g_string_append_printf(buf, "Translation buffer state:\n"); 1185 /* 1186 * Report total code size including the padding and TB structs; 1187 * otherwise users might think "-accel tcg,tb-size" is not honoured. 1188 * For avg host size we use the precise numbers from tb_tree_stats though. 1189 */ 1190 g_string_append_printf(buf, "gen code size %zu/%zu\n", 1191 tcg_code_size(), tcg_code_capacity()); 1192 g_string_append_printf(buf, "TB count %zu\n", nb_tbs); 1193 g_string_append_printf(buf, "TB avg target size %zu max=%zu bytes\n", 1194 nb_tbs ? tst.target_size / nb_tbs : 0, 1195 tst.max_target_size); 1196 g_string_append_printf(buf, "TB avg host size %zu bytes " 1197 "(expansion ratio: %0.1f)\n", 1198 nb_tbs ? tst.host_size / nb_tbs : 0, 1199 tst.target_size ? 1200 (double)tst.host_size / tst.target_size : 0); 1201 g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n", 1202 tst.cross_page, 1203 nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0); 1204 g_string_append_printf(buf, "direct jump count %zu (%zu%%) " 1205 "(2 jumps=%zu %zu%%)\n", 1206 tst.direct_jmp_count, 1207 nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0, 1208 tst.direct_jmp2_count, 1209 nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0); 1210 1211 qht_statistics_init(&tb_ctx.htable, &hst); 1212 print_qht_statistics(hst, buf); 1213 qht_statistics_destroy(&hst); 1214 1215 g_string_append_printf(buf, "\nStatistics:\n"); 1216 g_string_append_printf(buf, "TB flush count %u\n", 1217 qatomic_read(&tb_ctx.tb_flush_count)); 1218 g_string_append_printf(buf, "TB invalidate count %u\n", 1219 qatomic_read(&tb_ctx.tb_phys_invalidate_count)); 1220 1221 tlb_flush_counts(&flush_full, &flush_part, &flush_elide); 1222 g_string_append_printf(buf, "TLB full flushes %zu\n", flush_full); 1223 g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part); 1224 g_string_append_printf(buf, "TLB elided flushes %zu\n", flush_elide); 1225 tcg_dump_info(buf); 1226 } 1227 1228 #else /* CONFIG_USER_ONLY */ 1229 1230 void cpu_interrupt(CPUState *cpu, int mask) 1231 { 1232 g_assert(qemu_mutex_iothread_locked()); 1233 cpu->interrupt_request |= mask; 1234 qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1); 1235 } 1236 1237 /* 1238 * Walks guest process memory "regions" one by one 1239 * and calls callback function 'fn' for each region. 1240 */ 1241 struct walk_memory_regions_data { 1242 walk_memory_regions_fn fn; 1243 void *priv; 1244 target_ulong start; 1245 int prot; 1246 }; 1247 1248 static int walk_memory_regions_end(struct walk_memory_regions_data *data, 1249 target_ulong end, int new_prot) 1250 { 1251 if (data->start != -1u) { 1252 int rc = data->fn(data->priv, data->start, end, data->prot); 1253 if (rc != 0) { 1254 return rc; 1255 } 1256 } 1257 1258 data->start = (new_prot ? end : -1u); 1259 data->prot = new_prot; 1260 1261 return 0; 1262 } 1263 1264 static int walk_memory_regions_1(struct walk_memory_regions_data *data, 1265 target_ulong base, int level, void **lp) 1266 { 1267 target_ulong pa; 1268 int i, rc; 1269 1270 if (*lp == NULL) { 1271 return walk_memory_regions_end(data, base, 0); 1272 } 1273 1274 if (level == 0) { 1275 PageDesc *pd = *lp; 1276 1277 for (i = 0; i < V_L2_SIZE; ++i) { 1278 int prot = pd[i].flags; 1279 1280 pa = base | (i << TARGET_PAGE_BITS); 1281 if (prot != data->prot) { 1282 rc = walk_memory_regions_end(data, pa, prot); 1283 if (rc != 0) { 1284 return rc; 1285 } 1286 } 1287 } 1288 } else { 1289 void **pp = *lp; 1290 1291 for (i = 0; i < V_L2_SIZE; ++i) { 1292 pa = base | ((target_ulong)i << 1293 (TARGET_PAGE_BITS + V_L2_BITS * level)); 1294 rc = walk_memory_regions_1(data, pa, level - 1, pp + i); 1295 if (rc != 0) { 1296 return rc; 1297 } 1298 } 1299 } 1300 1301 return 0; 1302 } 1303 1304 int walk_memory_regions(void *priv, walk_memory_regions_fn fn) 1305 { 1306 struct walk_memory_regions_data data; 1307 uintptr_t i, l1_sz = v_l1_size; 1308 1309 data.fn = fn; 1310 data.priv = priv; 1311 data.start = -1u; 1312 data.prot = 0; 1313 1314 for (i = 0; i < l1_sz; i++) { 1315 target_ulong base = i << (v_l1_shift + TARGET_PAGE_BITS); 1316 int rc = walk_memory_regions_1(&data, base, v_l2_levels, l1_map + i); 1317 if (rc != 0) { 1318 return rc; 1319 } 1320 } 1321 1322 return walk_memory_regions_end(&data, 0, 0); 1323 } 1324 1325 static int dump_region(void *priv, target_ulong start, 1326 target_ulong end, unsigned long prot) 1327 { 1328 FILE *f = (FILE *)priv; 1329 1330 (void) fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx 1331 " "TARGET_FMT_lx" %c%c%c\n", 1332 start, end, end - start, 1333 ((prot & PAGE_READ) ? 'r' : '-'), 1334 ((prot & PAGE_WRITE) ? 'w' : '-'), 1335 ((prot & PAGE_EXEC) ? 'x' : '-')); 1336 1337 return 0; 1338 } 1339 1340 /* dump memory mappings */ 1341 void page_dump(FILE *f) 1342 { 1343 const int length = sizeof(target_ulong) * 2; 1344 (void) fprintf(f, "%-*s %-*s %-*s %s\n", 1345 length, "start", length, "end", length, "size", "prot"); 1346 walk_memory_regions(f, dump_region); 1347 } 1348 1349 int page_get_flags(target_ulong address) 1350 { 1351 PageDesc *p; 1352 1353 p = page_find(address >> TARGET_PAGE_BITS); 1354 if (!p) { 1355 return 0; 1356 } 1357 return p->flags; 1358 } 1359 1360 /* 1361 * Allow the target to decide if PAGE_TARGET_[12] may be reset. 1362 * By default, they are not kept. 1363 */ 1364 #ifndef PAGE_TARGET_STICKY 1365 #define PAGE_TARGET_STICKY 0 1366 #endif 1367 #define PAGE_STICKY (PAGE_ANON | PAGE_PASSTHROUGH | PAGE_TARGET_STICKY) 1368 1369 /* Modify the flags of a page and invalidate the code if necessary. 1370 The flag PAGE_WRITE_ORG is positioned automatically depending 1371 on PAGE_WRITE. The mmap_lock should already be held. */ 1372 void page_set_flags(target_ulong start, target_ulong end, int flags) 1373 { 1374 target_ulong addr, len; 1375 bool reset, inval_tb = false; 1376 1377 /* This function should never be called with addresses outside the 1378 guest address space. If this assert fires, it probably indicates 1379 a missing call to h2g_valid. */ 1380 assert(end - 1 <= GUEST_ADDR_MAX); 1381 assert(start < end); 1382 /* Only set PAGE_ANON with new mappings. */ 1383 assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET)); 1384 assert_memory_lock(); 1385 1386 start = start & TARGET_PAGE_MASK; 1387 end = TARGET_PAGE_ALIGN(end); 1388 1389 if (flags & PAGE_WRITE) { 1390 flags |= PAGE_WRITE_ORG; 1391 } 1392 reset = !(flags & PAGE_VALID) || (flags & PAGE_RESET); 1393 if (reset) { 1394 page_reset_target_data(start, end); 1395 } 1396 flags &= ~PAGE_RESET; 1397 1398 for (addr = start, len = end - start; 1399 len != 0; 1400 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) { 1401 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, true); 1402 1403 /* 1404 * If the page was executable, but is reset, or is no longer 1405 * executable, or has become writable, then invalidate any code. 1406 */ 1407 if ((p->flags & PAGE_EXEC) 1408 && (reset || 1409 !(flags & PAGE_EXEC) || 1410 (flags & ~p->flags & PAGE_WRITE))) { 1411 inval_tb = true; 1412 } 1413 /* Using mprotect on a page does not change sticky bits. */ 1414 p->flags = (reset ? 0 : p->flags & PAGE_STICKY) | flags; 1415 } 1416 1417 if (inval_tb) { 1418 tb_invalidate_phys_range(start, end); 1419 } 1420 } 1421 1422 int page_check_range(target_ulong start, target_ulong len, int flags) 1423 { 1424 PageDesc *p; 1425 target_ulong end; 1426 target_ulong addr; 1427 1428 /* This function should never be called with addresses outside the 1429 guest address space. If this assert fires, it probably indicates 1430 a missing call to h2g_valid. */ 1431 if (TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS) { 1432 assert(start < ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS)); 1433 } 1434 1435 if (len == 0) { 1436 return 0; 1437 } 1438 if (start + len - 1 < start) { 1439 /* We've wrapped around. */ 1440 return -1; 1441 } 1442 1443 /* must do before we loose bits in the next step */ 1444 end = TARGET_PAGE_ALIGN(start + len); 1445 start = start & TARGET_PAGE_MASK; 1446 1447 for (addr = start, len = end - start; 1448 len != 0; 1449 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) { 1450 p = page_find(addr >> TARGET_PAGE_BITS); 1451 if (!p) { 1452 return -1; 1453 } 1454 if (!(p->flags & PAGE_VALID)) { 1455 return -1; 1456 } 1457 1458 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ)) { 1459 return -1; 1460 } 1461 if (flags & PAGE_WRITE) { 1462 if (!(p->flags & PAGE_WRITE_ORG)) { 1463 return -1; 1464 } 1465 /* unprotect the page if it was put read-only because it 1466 contains translated code */ 1467 if (!(p->flags & PAGE_WRITE)) { 1468 if (!page_unprotect(addr, 0)) { 1469 return -1; 1470 } 1471 } 1472 } 1473 } 1474 return 0; 1475 } 1476 1477 void page_protect(tb_page_addr_t page_addr) 1478 { 1479 target_ulong addr; 1480 PageDesc *p; 1481 int prot; 1482 1483 p = page_find(page_addr >> TARGET_PAGE_BITS); 1484 if (p && (p->flags & PAGE_WRITE)) { 1485 /* 1486 * Force the host page as non writable (writes will have a page fault + 1487 * mprotect overhead). 1488 */ 1489 page_addr &= qemu_host_page_mask; 1490 prot = 0; 1491 for (addr = page_addr; addr < page_addr + qemu_host_page_size; 1492 addr += TARGET_PAGE_SIZE) { 1493 1494 p = page_find(addr >> TARGET_PAGE_BITS); 1495 if (!p) { 1496 continue; 1497 } 1498 prot |= p->flags; 1499 p->flags &= ~PAGE_WRITE; 1500 } 1501 mprotect(g2h_untagged(page_addr), qemu_host_page_size, 1502 (prot & PAGE_BITS) & ~PAGE_WRITE); 1503 } 1504 } 1505 1506 /* called from signal handler: invalidate the code and unprotect the 1507 * page. Return 0 if the fault was not handled, 1 if it was handled, 1508 * and 2 if it was handled but the caller must cause the TB to be 1509 * immediately exited. (We can only return 2 if the 'pc' argument is 1510 * non-zero.) 1511 */ 1512 int page_unprotect(target_ulong address, uintptr_t pc) 1513 { 1514 unsigned int prot; 1515 bool current_tb_invalidated; 1516 PageDesc *p; 1517 target_ulong host_start, host_end, addr; 1518 1519 /* Technically this isn't safe inside a signal handler. However we 1520 know this only ever happens in a synchronous SEGV handler, so in 1521 practice it seems to be ok. */ 1522 mmap_lock(); 1523 1524 p = page_find(address >> TARGET_PAGE_BITS); 1525 if (!p) { 1526 mmap_unlock(); 1527 return 0; 1528 } 1529 1530 /* if the page was really writable, then we change its 1531 protection back to writable */ 1532 if (p->flags & PAGE_WRITE_ORG) { 1533 current_tb_invalidated = false; 1534 if (p->flags & PAGE_WRITE) { 1535 /* If the page is actually marked WRITE then assume this is because 1536 * this thread raced with another one which got here first and 1537 * set the page to PAGE_WRITE and did the TB invalidate for us. 1538 */ 1539 #ifdef TARGET_HAS_PRECISE_SMC 1540 TranslationBlock *current_tb = tcg_tb_lookup(pc); 1541 if (current_tb) { 1542 current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID; 1543 } 1544 #endif 1545 } else { 1546 host_start = address & qemu_host_page_mask; 1547 host_end = host_start + qemu_host_page_size; 1548 1549 prot = 0; 1550 for (addr = host_start; addr < host_end; addr += TARGET_PAGE_SIZE) { 1551 p = page_find(addr >> TARGET_PAGE_BITS); 1552 p->flags |= PAGE_WRITE; 1553 prot |= p->flags; 1554 1555 /* and since the content will be modified, we must invalidate 1556 the corresponding translated code. */ 1557 current_tb_invalidated |= 1558 tb_invalidate_phys_page_unwind(addr, pc); 1559 } 1560 mprotect((void *)g2h_untagged(host_start), qemu_host_page_size, 1561 prot & PAGE_BITS); 1562 } 1563 mmap_unlock(); 1564 /* If current TB was invalidated return to main loop */ 1565 return current_tb_invalidated ? 2 : 1; 1566 } 1567 mmap_unlock(); 1568 return 0; 1569 } 1570 #endif /* CONFIG_USER_ONLY */ 1571 1572 /* 1573 * Called by generic code at e.g. cpu reset after cpu creation, 1574 * therefore we must be prepared to allocate the jump cache. 1575 */ 1576 void tcg_flush_jmp_cache(CPUState *cpu) 1577 { 1578 CPUJumpCache *jc = cpu->tb_jmp_cache; 1579 1580 if (likely(jc)) { 1581 for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) { 1582 qatomic_set(&jc->array[i].tb, NULL); 1583 } 1584 } else { 1585 /* This should happen once during realize, and thus never race. */ 1586 jc = g_new0(CPUJumpCache, 1); 1587 jc = qatomic_xchg(&cpu->tb_jmp_cache, jc); 1588 assert(jc == NULL); 1589 } 1590 } 1591 1592 /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */ 1593 void tcg_flush_softmmu_tlb(CPUState *cs) 1594 { 1595 #ifdef CONFIG_SOFTMMU 1596 tlb_flush(cs); 1597 #endif 1598 } 1599