1 /* 2 * Host code generation 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 22 #define NO_CPU_IO_DEFS 23 #include "trace.h" 24 #include "disas/disas.h" 25 #include "exec/exec-all.h" 26 #include "tcg/tcg.h" 27 #if defined(CONFIG_USER_ONLY) 28 #include "qemu.h" 29 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 30 #include <sys/param.h> 31 #if __FreeBSD_version >= 700104 32 #define HAVE_KINFO_GETVMMAP 33 #define sigqueue sigqueue_freebsd /* avoid redefinition */ 34 #include <sys/proc.h> 35 #include <machine/profile.h> 36 #define _KERNEL 37 #include <sys/user.h> 38 #undef _KERNEL 39 #undef sigqueue 40 #include <libutil.h> 41 #endif 42 #endif 43 #else 44 #include "exec/ram_addr.h" 45 #endif 46 47 #include "exec/cputlb.h" 48 #include "exec/translate-all.h" 49 #include "exec/translator.h" 50 #include "qemu/bitmap.h" 51 #include "qemu/qemu-print.h" 52 #include "qemu/timer.h" 53 #include "qemu/main-loop.h" 54 #include "qemu/cacheinfo.h" 55 #include "exec/log.h" 56 #include "sysemu/cpus.h" 57 #include "sysemu/cpu-timers.h" 58 #include "sysemu/tcg.h" 59 #include "qapi/error.h" 60 #include "hw/core/tcg-cpu-ops.h" 61 #include "tb-jmp-cache.h" 62 #include "tb-hash.h" 63 #include "tb-context.h" 64 #include "internal.h" 65 66 /* make various TB consistency checks */ 67 68 /** 69 * struct page_entry - page descriptor entry 70 * @pd: pointer to the &struct PageDesc of the page this entry represents 71 * @index: page index of the page 72 * @locked: whether the page is locked 73 * 74 * This struct helps us keep track of the locked state of a page, without 75 * bloating &struct PageDesc. 76 * 77 * A page lock protects accesses to all fields of &struct PageDesc. 78 * 79 * See also: &struct page_collection. 80 */ 81 struct page_entry { 82 PageDesc *pd; 83 tb_page_addr_t index; 84 bool locked; 85 }; 86 87 /** 88 * struct page_collection - tracks a set of pages (i.e. &struct page_entry's) 89 * @tree: Binary search tree (BST) of the pages, with key == page index 90 * @max: Pointer to the page in @tree with the highest page index 91 * 92 * To avoid deadlock we lock pages in ascending order of page index. 93 * When operating on a set of pages, we need to keep track of them so that 94 * we can lock them in order and also unlock them later. For this we collect 95 * pages (i.e. &struct page_entry's) in a binary search @tree. Given that the 96 * @tree implementation we use does not provide an O(1) operation to obtain the 97 * highest-ranked element, we use @max to keep track of the inserted page 98 * with the highest index. This is valuable because if a page is not in 99 * the tree and its index is higher than @max's, then we can lock it 100 * without breaking the locking order rule. 101 * 102 * Note on naming: 'struct page_set' would be shorter, but we already have a few 103 * page_set_*() helpers, so page_collection is used instead to avoid confusion. 104 * 105 * See also: page_collection_lock(). 106 */ 107 struct page_collection { 108 GTree *tree; 109 struct page_entry *max; 110 }; 111 112 /* 113 * In system mode we want L1_MAP to be based on ram offsets, 114 * while in user mode we want it to be based on virtual addresses. 115 * 116 * TODO: For user mode, see the caveat re host vs guest virtual 117 * address spaces near GUEST_ADDR_MAX. 118 */ 119 #if !defined(CONFIG_USER_ONLY) 120 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS 121 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS 122 #else 123 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS 124 #endif 125 #else 126 # define L1_MAP_ADDR_SPACE_BITS MIN(HOST_LONG_BITS, TARGET_ABI_BITS) 127 #endif 128 129 /* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */ 130 QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS > 131 sizeof_field(TranslationBlock, trace_vcpu_dstate) 132 * BITS_PER_BYTE); 133 134 /* 135 * L1 Mapping properties 136 */ 137 int v_l1_size; 138 int v_l1_shift; 139 int v_l2_levels; 140 141 void *l1_map[V_L1_MAX_SIZE]; 142 143 TBContext tb_ctx; 144 145 static void page_table_config_init(void) 146 { 147 uint32_t v_l1_bits; 148 149 assert(TARGET_PAGE_BITS); 150 /* The bits remaining after N lower levels of page tables. */ 151 v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS; 152 if (v_l1_bits < V_L1_MIN_BITS) { 153 v_l1_bits += V_L2_BITS; 154 } 155 156 v_l1_size = 1 << v_l1_bits; 157 v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits; 158 v_l2_levels = v_l1_shift / V_L2_BITS - 1; 159 160 assert(v_l1_bits <= V_L1_MAX_BITS); 161 assert(v_l1_shift % V_L2_BITS == 0); 162 assert(v_l2_levels >= 0); 163 } 164 165 /* Encode VAL as a signed leb128 sequence at P. 166 Return P incremented past the encoded value. */ 167 static uint8_t *encode_sleb128(uint8_t *p, target_long val) 168 { 169 int more, byte; 170 171 do { 172 byte = val & 0x7f; 173 val >>= 7; 174 more = !((val == 0 && (byte & 0x40) == 0) 175 || (val == -1 && (byte & 0x40) != 0)); 176 if (more) { 177 byte |= 0x80; 178 } 179 *p++ = byte; 180 } while (more); 181 182 return p; 183 } 184 185 /* Decode a signed leb128 sequence at *PP; increment *PP past the 186 decoded value. Return the decoded value. */ 187 static target_long decode_sleb128(const uint8_t **pp) 188 { 189 const uint8_t *p = *pp; 190 target_long val = 0; 191 int byte, shift = 0; 192 193 do { 194 byte = *p++; 195 val |= (target_ulong)(byte & 0x7f) << shift; 196 shift += 7; 197 } while (byte & 0x80); 198 if (shift < TARGET_LONG_BITS && (byte & 0x40)) { 199 val |= -(target_ulong)1 << shift; 200 } 201 202 *pp = p; 203 return val; 204 } 205 206 /* Encode the data collected about the instructions while compiling TB. 207 Place the data at BLOCK, and return the number of bytes consumed. 208 209 The logical table consists of TARGET_INSN_START_WORDS target_ulong's, 210 which come from the target's insn_start data, followed by a uintptr_t 211 which comes from the host pc of the end of the code implementing the insn. 212 213 Each line of the table is encoded as sleb128 deltas from the previous 214 line. The seed for the first line is { tb->pc, 0..., tb->tc.ptr }. 215 That is, the first column is seeded with the guest pc, the last column 216 with the host pc, and the middle columns with zeros. */ 217 218 static int encode_search(TranslationBlock *tb, uint8_t *block) 219 { 220 uint8_t *highwater = tcg_ctx->code_gen_highwater; 221 uint8_t *p = block; 222 int i, j, n; 223 224 for (i = 0, n = tb->icount; i < n; ++i) { 225 target_ulong prev; 226 227 for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { 228 if (i == 0) { 229 prev = (!TARGET_TB_PCREL && j == 0 ? tb_pc(tb) : 0); 230 } else { 231 prev = tcg_ctx->gen_insn_data[i - 1][j]; 232 } 233 p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev); 234 } 235 prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]); 236 p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev); 237 238 /* Test for (pending) buffer overflow. The assumption is that any 239 one row beginning below the high water mark cannot overrun 240 the buffer completely. Thus we can test for overflow after 241 encoding a row without having to check during encoding. */ 242 if (unlikely(p > highwater)) { 243 return -1; 244 } 245 } 246 247 return p - block; 248 } 249 250 static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc, 251 uint64_t *data) 252 { 253 uintptr_t iter_pc = (uintptr_t)tb->tc.ptr; 254 const uint8_t *p = tb->tc.ptr + tb->tc.size; 255 int i, j, num_insns = tb->icount; 256 257 host_pc -= GETPC_ADJ; 258 259 if (host_pc < iter_pc) { 260 return -1; 261 } 262 263 memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS); 264 if (!TARGET_TB_PCREL) { 265 data[0] = tb_pc(tb); 266 } 267 268 /* 269 * Reconstruct the stored insn data while looking for the point 270 * at which the end of the insn exceeds host_pc. 271 */ 272 for (i = 0; i < num_insns; ++i) { 273 for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { 274 data[j] += decode_sleb128(&p); 275 } 276 iter_pc += decode_sleb128(&p); 277 if (iter_pc > host_pc) { 278 return num_insns - i; 279 } 280 } 281 return -1; 282 } 283 284 /* 285 * The cpu state corresponding to 'host_pc' is restored in 286 * preparation for exiting the TB. 287 */ 288 void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, 289 uintptr_t host_pc) 290 { 291 uint64_t data[TARGET_INSN_START_WORDS]; 292 #ifdef CONFIG_PROFILER 293 TCGProfile *prof = &tcg_ctx->prof; 294 int64_t ti = profile_getclock(); 295 #endif 296 int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data); 297 298 if (insns_left < 0) { 299 return; 300 } 301 302 if (tb_cflags(tb) & CF_USE_ICOUNT) { 303 assert(icount_enabled()); 304 /* 305 * Reset the cycle counter to the start of the block and 306 * shift if to the number of actually executed instructions. 307 */ 308 cpu_neg(cpu)->icount_decr.u16.low += insns_left; 309 } 310 311 cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data); 312 313 #ifdef CONFIG_PROFILER 314 qatomic_set(&prof->restore_time, 315 prof->restore_time + profile_getclock() - ti); 316 qatomic_set(&prof->restore_count, prof->restore_count + 1); 317 #endif 318 } 319 320 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc) 321 { 322 /* 323 * The host_pc has to be in the rx region of the code buffer. 324 * If it is not we will not be able to resolve it here. 325 * The two cases where host_pc will not be correct are: 326 * 327 * - fault during translation (instruction fetch) 328 * - fault from helper (not using GETPC() macro) 329 * 330 * Either way we need return early as we can't resolve it here. 331 */ 332 if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) { 333 TranslationBlock *tb = tcg_tb_lookup(host_pc); 334 if (tb) { 335 cpu_restore_state_from_tb(cpu, tb, host_pc); 336 return true; 337 } 338 } 339 return false; 340 } 341 342 bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data) 343 { 344 if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) { 345 TranslationBlock *tb = tcg_tb_lookup(host_pc); 346 if (tb) { 347 return cpu_unwind_data_from_tb(tb, host_pc, data) >= 0; 348 } 349 } 350 return false; 351 } 352 353 void page_init(void) 354 { 355 page_size_init(); 356 page_table_config_init(); 357 358 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY) 359 { 360 #ifdef HAVE_KINFO_GETVMMAP 361 struct kinfo_vmentry *freep; 362 int i, cnt; 363 364 freep = kinfo_getvmmap(getpid(), &cnt); 365 if (freep) { 366 mmap_lock(); 367 for (i = 0; i < cnt; i++) { 368 unsigned long startaddr, endaddr; 369 370 startaddr = freep[i].kve_start; 371 endaddr = freep[i].kve_end; 372 if (h2g_valid(startaddr)) { 373 startaddr = h2g(startaddr) & TARGET_PAGE_MASK; 374 375 if (h2g_valid(endaddr)) { 376 endaddr = h2g(endaddr); 377 page_set_flags(startaddr, endaddr, PAGE_RESERVED); 378 } else { 379 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS 380 endaddr = ~0ul; 381 page_set_flags(startaddr, endaddr, PAGE_RESERVED); 382 #endif 383 } 384 } 385 } 386 free(freep); 387 mmap_unlock(); 388 } 389 #else 390 FILE *f; 391 392 last_brk = (unsigned long)sbrk(0); 393 394 f = fopen("/compat/linux/proc/self/maps", "r"); 395 if (f) { 396 mmap_lock(); 397 398 do { 399 unsigned long startaddr, endaddr; 400 int n; 401 402 n = fscanf(f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr); 403 404 if (n == 2 && h2g_valid(startaddr)) { 405 startaddr = h2g(startaddr) & TARGET_PAGE_MASK; 406 407 if (h2g_valid(endaddr)) { 408 endaddr = h2g(endaddr); 409 } else { 410 endaddr = ~0ul; 411 } 412 page_set_flags(startaddr, endaddr, PAGE_RESERVED); 413 } 414 } while (!feof(f)); 415 416 fclose(f); 417 mmap_unlock(); 418 } 419 #endif 420 } 421 #endif 422 } 423 424 PageDesc *page_find_alloc(tb_page_addr_t index, bool alloc) 425 { 426 PageDesc *pd; 427 void **lp; 428 int i; 429 430 /* Level 1. Always allocated. */ 431 lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1)); 432 433 /* Level 2..N-1. */ 434 for (i = v_l2_levels; i > 0; i--) { 435 void **p = qatomic_rcu_read(lp); 436 437 if (p == NULL) { 438 void *existing; 439 440 if (!alloc) { 441 return NULL; 442 } 443 p = g_new0(void *, V_L2_SIZE); 444 existing = qatomic_cmpxchg(lp, NULL, p); 445 if (unlikely(existing)) { 446 g_free(p); 447 p = existing; 448 } 449 } 450 451 lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1)); 452 } 453 454 pd = qatomic_rcu_read(lp); 455 if (pd == NULL) { 456 void *existing; 457 458 if (!alloc) { 459 return NULL; 460 } 461 pd = g_new0(PageDesc, V_L2_SIZE); 462 #ifndef CONFIG_USER_ONLY 463 { 464 int i; 465 466 for (i = 0; i < V_L2_SIZE; i++) { 467 qemu_spin_init(&pd[i].lock); 468 } 469 } 470 #endif 471 existing = qatomic_cmpxchg(lp, NULL, pd); 472 if (unlikely(existing)) { 473 #ifndef CONFIG_USER_ONLY 474 { 475 int i; 476 477 for (i = 0; i < V_L2_SIZE; i++) { 478 qemu_spin_destroy(&pd[i].lock); 479 } 480 } 481 #endif 482 g_free(pd); 483 pd = existing; 484 } 485 } 486 487 return pd + (index & (V_L2_SIZE - 1)); 488 } 489 490 /* In user-mode page locks aren't used; mmap_lock is enough */ 491 #ifdef CONFIG_USER_ONLY 492 struct page_collection * 493 page_collection_lock(tb_page_addr_t start, tb_page_addr_t end) 494 { 495 return NULL; 496 } 497 498 void page_collection_unlock(struct page_collection *set) 499 { } 500 #else /* !CONFIG_USER_ONLY */ 501 502 #ifdef CONFIG_DEBUG_TCG 503 504 static __thread GHashTable *ht_pages_locked_debug; 505 506 static void ht_pages_locked_debug_init(void) 507 { 508 if (ht_pages_locked_debug) { 509 return; 510 } 511 ht_pages_locked_debug = g_hash_table_new(NULL, NULL); 512 } 513 514 static bool page_is_locked(const PageDesc *pd) 515 { 516 PageDesc *found; 517 518 ht_pages_locked_debug_init(); 519 found = g_hash_table_lookup(ht_pages_locked_debug, pd); 520 return !!found; 521 } 522 523 static void page_lock__debug(PageDesc *pd) 524 { 525 ht_pages_locked_debug_init(); 526 g_assert(!page_is_locked(pd)); 527 g_hash_table_insert(ht_pages_locked_debug, pd, pd); 528 } 529 530 static void page_unlock__debug(const PageDesc *pd) 531 { 532 bool removed; 533 534 ht_pages_locked_debug_init(); 535 g_assert(page_is_locked(pd)); 536 removed = g_hash_table_remove(ht_pages_locked_debug, pd); 537 g_assert(removed); 538 } 539 540 void do_assert_page_locked(const PageDesc *pd, const char *file, int line) 541 { 542 if (unlikely(!page_is_locked(pd))) { 543 error_report("assert_page_lock: PageDesc %p not locked @ %s:%d", 544 pd, file, line); 545 abort(); 546 } 547 } 548 549 void assert_no_pages_locked(void) 550 { 551 ht_pages_locked_debug_init(); 552 g_assert(g_hash_table_size(ht_pages_locked_debug) == 0); 553 } 554 555 #else /* !CONFIG_DEBUG_TCG */ 556 557 static inline void page_lock__debug(const PageDesc *pd) { } 558 static inline void page_unlock__debug(const PageDesc *pd) { } 559 560 #endif /* CONFIG_DEBUG_TCG */ 561 562 void page_lock(PageDesc *pd) 563 { 564 page_lock__debug(pd); 565 qemu_spin_lock(&pd->lock); 566 } 567 568 void page_unlock(PageDesc *pd) 569 { 570 qemu_spin_unlock(&pd->lock); 571 page_unlock__debug(pd); 572 } 573 574 static inline struct page_entry * 575 page_entry_new(PageDesc *pd, tb_page_addr_t index) 576 { 577 struct page_entry *pe = g_malloc(sizeof(*pe)); 578 579 pe->index = index; 580 pe->pd = pd; 581 pe->locked = false; 582 return pe; 583 } 584 585 static void page_entry_destroy(gpointer p) 586 { 587 struct page_entry *pe = p; 588 589 g_assert(pe->locked); 590 page_unlock(pe->pd); 591 g_free(pe); 592 } 593 594 /* returns false on success */ 595 static bool page_entry_trylock(struct page_entry *pe) 596 { 597 bool busy; 598 599 busy = qemu_spin_trylock(&pe->pd->lock); 600 if (!busy) { 601 g_assert(!pe->locked); 602 pe->locked = true; 603 page_lock__debug(pe->pd); 604 } 605 return busy; 606 } 607 608 static void do_page_entry_lock(struct page_entry *pe) 609 { 610 page_lock(pe->pd); 611 g_assert(!pe->locked); 612 pe->locked = true; 613 } 614 615 static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data) 616 { 617 struct page_entry *pe = value; 618 619 do_page_entry_lock(pe); 620 return FALSE; 621 } 622 623 static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data) 624 { 625 struct page_entry *pe = value; 626 627 if (pe->locked) { 628 pe->locked = false; 629 page_unlock(pe->pd); 630 } 631 return FALSE; 632 } 633 634 /* 635 * Trylock a page, and if successful, add the page to a collection. 636 * Returns true ("busy") if the page could not be locked; false otherwise. 637 */ 638 static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr) 639 { 640 tb_page_addr_t index = addr >> TARGET_PAGE_BITS; 641 struct page_entry *pe; 642 PageDesc *pd; 643 644 pe = g_tree_lookup(set->tree, &index); 645 if (pe) { 646 return false; 647 } 648 649 pd = page_find(index); 650 if (pd == NULL) { 651 return false; 652 } 653 654 pe = page_entry_new(pd, index); 655 g_tree_insert(set->tree, &pe->index, pe); 656 657 /* 658 * If this is either (1) the first insertion or (2) a page whose index 659 * is higher than any other so far, just lock the page and move on. 660 */ 661 if (set->max == NULL || pe->index > set->max->index) { 662 set->max = pe; 663 do_page_entry_lock(pe); 664 return false; 665 } 666 /* 667 * Try to acquire out-of-order lock; if busy, return busy so that we acquire 668 * locks in order. 669 */ 670 return page_entry_trylock(pe); 671 } 672 673 static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata) 674 { 675 tb_page_addr_t a = *(const tb_page_addr_t *)ap; 676 tb_page_addr_t b = *(const tb_page_addr_t *)bp; 677 678 if (a == b) { 679 return 0; 680 } else if (a < b) { 681 return -1; 682 } 683 return 1; 684 } 685 686 /* 687 * Lock a range of pages ([@start,@end[) as well as the pages of all 688 * intersecting TBs. 689 * Locking order: acquire locks in ascending order of page index. 690 */ 691 struct page_collection * 692 page_collection_lock(tb_page_addr_t start, tb_page_addr_t end) 693 { 694 struct page_collection *set = g_malloc(sizeof(*set)); 695 tb_page_addr_t index; 696 PageDesc *pd; 697 698 start >>= TARGET_PAGE_BITS; 699 end >>= TARGET_PAGE_BITS; 700 g_assert(start <= end); 701 702 set->tree = g_tree_new_full(tb_page_addr_cmp, NULL, NULL, 703 page_entry_destroy); 704 set->max = NULL; 705 assert_no_pages_locked(); 706 707 retry: 708 g_tree_foreach(set->tree, page_entry_lock, NULL); 709 710 for (index = start; index <= end; index++) { 711 TranslationBlock *tb; 712 int n; 713 714 pd = page_find(index); 715 if (pd == NULL) { 716 continue; 717 } 718 if (page_trylock_add(set, index << TARGET_PAGE_BITS)) { 719 g_tree_foreach(set->tree, page_entry_unlock, NULL); 720 goto retry; 721 } 722 assert_page_locked(pd); 723 PAGE_FOR_EACH_TB(pd, tb, n) { 724 if (page_trylock_add(set, tb_page_addr0(tb)) || 725 (tb_page_addr1(tb) != -1 && 726 page_trylock_add(set, tb_page_addr1(tb)))) { 727 /* drop all locks, and reacquire in order */ 728 g_tree_foreach(set->tree, page_entry_unlock, NULL); 729 goto retry; 730 } 731 } 732 } 733 return set; 734 } 735 736 void page_collection_unlock(struct page_collection *set) 737 { 738 /* entries are unlocked and freed via page_entry_destroy */ 739 g_tree_destroy(set->tree); 740 g_free(set); 741 } 742 743 #endif /* !CONFIG_USER_ONLY */ 744 745 /* Called with mmap_lock held for user mode emulation. */ 746 TranslationBlock *tb_gen_code(CPUState *cpu, 747 target_ulong pc, target_ulong cs_base, 748 uint32_t flags, int cflags) 749 { 750 CPUArchState *env = cpu->env_ptr; 751 TranslationBlock *tb, *existing_tb; 752 tb_page_addr_t phys_pc; 753 tcg_insn_unit *gen_code_buf; 754 int gen_code_size, search_size, max_insns; 755 #ifdef CONFIG_PROFILER 756 TCGProfile *prof = &tcg_ctx->prof; 757 int64_t ti; 758 #endif 759 void *host_pc; 760 761 assert_memory_lock(); 762 qemu_thread_jit_write(); 763 764 phys_pc = get_page_addr_code_hostp(env, pc, &host_pc); 765 766 if (phys_pc == -1) { 767 /* Generate a one-shot TB with 1 insn in it */ 768 cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1; 769 } 770 771 max_insns = cflags & CF_COUNT_MASK; 772 if (max_insns == 0) { 773 max_insns = TCG_MAX_INSNS; 774 } 775 QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS); 776 777 buffer_overflow: 778 tb = tcg_tb_alloc(tcg_ctx); 779 if (unlikely(!tb)) { 780 /* flush must be done */ 781 tb_flush(cpu); 782 mmap_unlock(); 783 /* Make the execution loop process the flush as soon as possible. */ 784 cpu->exception_index = EXCP_INTERRUPT; 785 cpu_loop_exit(cpu); 786 } 787 788 gen_code_buf = tcg_ctx->code_gen_ptr; 789 tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf); 790 #if !TARGET_TB_PCREL 791 tb->pc = pc; 792 #endif 793 tb->cs_base = cs_base; 794 tb->flags = flags; 795 tb->cflags = cflags; 796 tb->trace_vcpu_dstate = *cpu->trace_dstate; 797 tb_set_page_addr0(tb, phys_pc); 798 tb_set_page_addr1(tb, -1); 799 tcg_ctx->tb_cflags = cflags; 800 tb_overflow: 801 802 #ifdef CONFIG_PROFILER 803 /* includes aborted translations because of exceptions */ 804 qatomic_set(&prof->tb_count1, prof->tb_count1 + 1); 805 ti = profile_getclock(); 806 #endif 807 808 gen_code_size = sigsetjmp(tcg_ctx->jmp_trans, 0); 809 if (unlikely(gen_code_size != 0)) { 810 goto error_return; 811 } 812 813 tcg_func_start(tcg_ctx); 814 815 tcg_ctx->cpu = env_cpu(env); 816 gen_intermediate_code(cpu, tb, max_insns, pc, host_pc); 817 assert(tb->size != 0); 818 tcg_ctx->cpu = NULL; 819 max_insns = tb->icount; 820 821 trace_translate_block(tb, pc, tb->tc.ptr); 822 823 /* generate machine code */ 824 tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID; 825 tb->jmp_reset_offset[1] = TB_JMP_RESET_OFFSET_INVALID; 826 tcg_ctx->tb_jmp_reset_offset = tb->jmp_reset_offset; 827 if (TCG_TARGET_HAS_direct_jump) { 828 tcg_ctx->tb_jmp_insn_offset = tb->jmp_target_arg; 829 tcg_ctx->tb_jmp_target_addr = NULL; 830 } else { 831 tcg_ctx->tb_jmp_insn_offset = NULL; 832 tcg_ctx->tb_jmp_target_addr = tb->jmp_target_arg; 833 } 834 835 #ifdef CONFIG_PROFILER 836 qatomic_set(&prof->tb_count, prof->tb_count + 1); 837 qatomic_set(&prof->interm_time, 838 prof->interm_time + profile_getclock() - ti); 839 ti = profile_getclock(); 840 #endif 841 842 gen_code_size = tcg_gen_code(tcg_ctx, tb, pc); 843 if (unlikely(gen_code_size < 0)) { 844 error_return: 845 switch (gen_code_size) { 846 case -1: 847 /* 848 * Overflow of code_gen_buffer, or the current slice of it. 849 * 850 * TODO: We don't need to re-do gen_intermediate_code, nor 851 * should we re-do the tcg optimization currently hidden 852 * inside tcg_gen_code. All that should be required is to 853 * flush the TBs, allocate a new TB, re-initialize it per 854 * above, and re-do the actual code generation. 855 */ 856 qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT, 857 "Restarting code generation for " 858 "code_gen_buffer overflow\n"); 859 goto buffer_overflow; 860 861 case -2: 862 /* 863 * The code generated for the TranslationBlock is too large. 864 * The maximum size allowed by the unwind info is 64k. 865 * There may be stricter constraints from relocations 866 * in the tcg backend. 867 * 868 * Try again with half as many insns as we attempted this time. 869 * If a single insn overflows, there's a bug somewhere... 870 */ 871 assert(max_insns > 1); 872 max_insns /= 2; 873 qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT, 874 "Restarting code generation with " 875 "smaller translation block (max %d insns)\n", 876 max_insns); 877 goto tb_overflow; 878 879 default: 880 g_assert_not_reached(); 881 } 882 } 883 search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size); 884 if (unlikely(search_size < 0)) { 885 goto buffer_overflow; 886 } 887 tb->tc.size = gen_code_size; 888 889 #ifdef CONFIG_PROFILER 890 qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti); 891 qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size); 892 qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size); 893 qatomic_set(&prof->search_out_len, prof->search_out_len + search_size); 894 #endif 895 896 #ifdef DEBUG_DISAS 897 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) && 898 qemu_log_in_addr_range(pc)) { 899 FILE *logfile = qemu_log_trylock(); 900 if (logfile) { 901 int code_size, data_size; 902 const tcg_target_ulong *rx_data_gen_ptr; 903 size_t chunk_start; 904 int insn = 0; 905 906 if (tcg_ctx->data_gen_ptr) { 907 rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr); 908 code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr; 909 data_size = gen_code_size - code_size; 910 } else { 911 rx_data_gen_ptr = 0; 912 code_size = gen_code_size; 913 data_size = 0; 914 } 915 916 /* Dump header and the first instruction */ 917 fprintf(logfile, "OUT: [size=%d]\n", gen_code_size); 918 fprintf(logfile, 919 " -- guest addr 0x" TARGET_FMT_lx " + tb prologue\n", 920 tcg_ctx->gen_insn_data[insn][0]); 921 chunk_start = tcg_ctx->gen_insn_end_off[insn]; 922 disas(logfile, tb->tc.ptr, chunk_start); 923 924 /* 925 * Dump each instruction chunk, wrapping up empty chunks into 926 * the next instruction. The whole array is offset so the 927 * first entry is the beginning of the 2nd instruction. 928 */ 929 while (insn < tb->icount) { 930 size_t chunk_end = tcg_ctx->gen_insn_end_off[insn]; 931 if (chunk_end > chunk_start) { 932 fprintf(logfile, " -- guest addr 0x" TARGET_FMT_lx "\n", 933 tcg_ctx->gen_insn_data[insn][0]); 934 disas(logfile, tb->tc.ptr + chunk_start, 935 chunk_end - chunk_start); 936 chunk_start = chunk_end; 937 } 938 insn++; 939 } 940 941 if (chunk_start < code_size) { 942 fprintf(logfile, " -- tb slow paths + alignment\n"); 943 disas(logfile, tb->tc.ptr + chunk_start, 944 code_size - chunk_start); 945 } 946 947 /* Finally dump any data we may have after the block */ 948 if (data_size) { 949 int i; 950 fprintf(logfile, " data: [size=%d]\n", data_size); 951 for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) { 952 if (sizeof(tcg_target_ulong) == 8) { 953 fprintf(logfile, 954 "0x%08" PRIxPTR ": .quad 0x%016" TCG_PRIlx "\n", 955 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]); 956 } else if (sizeof(tcg_target_ulong) == 4) { 957 fprintf(logfile, 958 "0x%08" PRIxPTR ": .long 0x%08" TCG_PRIlx "\n", 959 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]); 960 } else { 961 qemu_build_not_reached(); 962 } 963 } 964 } 965 fprintf(logfile, "\n"); 966 qemu_log_unlock(logfile); 967 } 968 } 969 #endif 970 971 qatomic_set(&tcg_ctx->code_gen_ptr, (void *) 972 ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size, 973 CODE_GEN_ALIGN)); 974 975 /* init jump list */ 976 qemu_spin_init(&tb->jmp_lock); 977 tb->jmp_list_head = (uintptr_t)NULL; 978 tb->jmp_list_next[0] = (uintptr_t)NULL; 979 tb->jmp_list_next[1] = (uintptr_t)NULL; 980 tb->jmp_dest[0] = (uintptr_t)NULL; 981 tb->jmp_dest[1] = (uintptr_t)NULL; 982 983 /* init original jump addresses which have been set during tcg_gen_code() */ 984 if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) { 985 tb_reset_jump(tb, 0); 986 } 987 if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) { 988 tb_reset_jump(tb, 1); 989 } 990 991 /* 992 * If the TB is not associated with a physical RAM page then it must be 993 * a temporary one-insn TB, and we have nothing left to do. Return early 994 * before attempting to link to other TBs or add to the lookup table. 995 */ 996 if (tb_page_addr0(tb) == -1) { 997 return tb; 998 } 999 1000 /* 1001 * Insert TB into the corresponding region tree before publishing it 1002 * through QHT. Otherwise rewinding happened in the TB might fail to 1003 * lookup itself using host PC. 1004 */ 1005 tcg_tb_insert(tb); 1006 1007 /* 1008 * No explicit memory barrier is required -- tb_link_page() makes the 1009 * TB visible in a consistent state. 1010 */ 1011 existing_tb = tb_link_page(tb, tb_page_addr0(tb), tb_page_addr1(tb)); 1012 /* if the TB already exists, discard what we just translated */ 1013 if (unlikely(existing_tb != tb)) { 1014 uintptr_t orig_aligned = (uintptr_t)gen_code_buf; 1015 1016 orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize); 1017 qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned); 1018 tcg_tb_remove(tb); 1019 return existing_tb; 1020 } 1021 return tb; 1022 } 1023 1024 /* user-mode: call with mmap_lock held */ 1025 void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr) 1026 { 1027 TranslationBlock *tb; 1028 1029 assert_memory_lock(); 1030 1031 tb = tcg_tb_lookup(retaddr); 1032 if (tb) { 1033 /* We can use retranslation to find the PC. */ 1034 cpu_restore_state_from_tb(cpu, tb, retaddr); 1035 tb_phys_invalidate(tb, -1); 1036 } else { 1037 /* The exception probably happened in a helper. The CPU state should 1038 have been saved before calling it. Fetch the PC from there. */ 1039 CPUArchState *env = cpu->env_ptr; 1040 target_ulong pc, cs_base; 1041 tb_page_addr_t addr; 1042 uint32_t flags; 1043 1044 cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); 1045 addr = get_page_addr_code(env, pc); 1046 if (addr != -1) { 1047 tb_invalidate_phys_range(addr, addr + 1); 1048 } 1049 } 1050 } 1051 1052 #ifndef CONFIG_USER_ONLY 1053 /* 1054 * In deterministic execution mode, instructions doing device I/Os 1055 * must be at the end of the TB. 1056 * 1057 * Called by softmmu_template.h, with iothread mutex not held. 1058 */ 1059 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) 1060 { 1061 TranslationBlock *tb; 1062 CPUClass *cc; 1063 uint32_t n; 1064 1065 tb = tcg_tb_lookup(retaddr); 1066 if (!tb) { 1067 cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p", 1068 (void *)retaddr); 1069 } 1070 cpu_restore_state_from_tb(cpu, tb, retaddr); 1071 1072 /* 1073 * Some guests must re-execute the branch when re-executing a delay 1074 * slot instruction. When this is the case, adjust icount and N 1075 * to account for the re-execution of the branch. 1076 */ 1077 n = 1; 1078 cc = CPU_GET_CLASS(cpu); 1079 if (cc->tcg_ops->io_recompile_replay_branch && 1080 cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) { 1081 cpu_neg(cpu)->icount_decr.u16.low++; 1082 n = 2; 1083 } 1084 1085 /* 1086 * Exit the loop and potentially generate a new TB executing the 1087 * just the I/O insns. We also limit instrumentation to memory 1088 * operations only (which execute after completion) so we don't 1089 * double instrument the instruction. 1090 */ 1091 cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n; 1092 1093 if (qemu_loglevel_mask(CPU_LOG_EXEC)) { 1094 target_ulong pc = log_pc(cpu, tb); 1095 if (qemu_log_in_addr_range(pc)) { 1096 qemu_log("cpu_io_recompile: rewound execution of TB to " 1097 TARGET_FMT_lx "\n", pc); 1098 } 1099 } 1100 1101 cpu_loop_exit_noexc(cpu); 1102 } 1103 1104 static void print_qht_statistics(struct qht_stats hst, GString *buf) 1105 { 1106 uint32_t hgram_opts; 1107 size_t hgram_bins; 1108 char *hgram; 1109 1110 if (!hst.head_buckets) { 1111 return; 1112 } 1113 g_string_append_printf(buf, "TB hash buckets %zu/%zu " 1114 "(%0.2f%% head buckets used)\n", 1115 hst.used_head_buckets, hst.head_buckets, 1116 (double)hst.used_head_buckets / 1117 hst.head_buckets * 100); 1118 1119 hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; 1120 hgram_opts |= QDIST_PR_100X | QDIST_PR_PERCENT; 1121 if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) { 1122 hgram_opts |= QDIST_PR_NODECIMAL; 1123 } 1124 hgram = qdist_pr(&hst.occupancy, 10, hgram_opts); 1125 g_string_append_printf(buf, "TB hash occupancy %0.2f%% avg chain occ. " 1126 "Histogram: %s\n", 1127 qdist_avg(&hst.occupancy) * 100, hgram); 1128 g_free(hgram); 1129 1130 hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; 1131 hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain); 1132 if (hgram_bins > 10) { 1133 hgram_bins = 10; 1134 } else { 1135 hgram_bins = 0; 1136 hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE; 1137 } 1138 hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts); 1139 g_string_append_printf(buf, "TB hash avg chain %0.3f buckets. " 1140 "Histogram: %s\n", 1141 qdist_avg(&hst.chain), hgram); 1142 g_free(hgram); 1143 } 1144 1145 struct tb_tree_stats { 1146 size_t nb_tbs; 1147 size_t host_size; 1148 size_t target_size; 1149 size_t max_target_size; 1150 size_t direct_jmp_count; 1151 size_t direct_jmp2_count; 1152 size_t cross_page; 1153 }; 1154 1155 static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data) 1156 { 1157 const TranslationBlock *tb = value; 1158 struct tb_tree_stats *tst = data; 1159 1160 tst->nb_tbs++; 1161 tst->host_size += tb->tc.size; 1162 tst->target_size += tb->size; 1163 if (tb->size > tst->max_target_size) { 1164 tst->max_target_size = tb->size; 1165 } 1166 if (tb_page_addr1(tb) != -1) { 1167 tst->cross_page++; 1168 } 1169 if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) { 1170 tst->direct_jmp_count++; 1171 if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) { 1172 tst->direct_jmp2_count++; 1173 } 1174 } 1175 return false; 1176 } 1177 1178 void dump_exec_info(GString *buf) 1179 { 1180 struct tb_tree_stats tst = {}; 1181 struct qht_stats hst; 1182 size_t nb_tbs, flush_full, flush_part, flush_elide; 1183 1184 tcg_tb_foreach(tb_tree_stats_iter, &tst); 1185 nb_tbs = tst.nb_tbs; 1186 /* XXX: avoid using doubles ? */ 1187 g_string_append_printf(buf, "Translation buffer state:\n"); 1188 /* 1189 * Report total code size including the padding and TB structs; 1190 * otherwise users might think "-accel tcg,tb-size" is not honoured. 1191 * For avg host size we use the precise numbers from tb_tree_stats though. 1192 */ 1193 g_string_append_printf(buf, "gen code size %zu/%zu\n", 1194 tcg_code_size(), tcg_code_capacity()); 1195 g_string_append_printf(buf, "TB count %zu\n", nb_tbs); 1196 g_string_append_printf(buf, "TB avg target size %zu max=%zu bytes\n", 1197 nb_tbs ? tst.target_size / nb_tbs : 0, 1198 tst.max_target_size); 1199 g_string_append_printf(buf, "TB avg host size %zu bytes " 1200 "(expansion ratio: %0.1f)\n", 1201 nb_tbs ? tst.host_size / nb_tbs : 0, 1202 tst.target_size ? 1203 (double)tst.host_size / tst.target_size : 0); 1204 g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n", 1205 tst.cross_page, 1206 nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0); 1207 g_string_append_printf(buf, "direct jump count %zu (%zu%%) " 1208 "(2 jumps=%zu %zu%%)\n", 1209 tst.direct_jmp_count, 1210 nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0, 1211 tst.direct_jmp2_count, 1212 nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0); 1213 1214 qht_statistics_init(&tb_ctx.htable, &hst); 1215 print_qht_statistics(hst, buf); 1216 qht_statistics_destroy(&hst); 1217 1218 g_string_append_printf(buf, "\nStatistics:\n"); 1219 g_string_append_printf(buf, "TB flush count %u\n", 1220 qatomic_read(&tb_ctx.tb_flush_count)); 1221 g_string_append_printf(buf, "TB invalidate count %u\n", 1222 qatomic_read(&tb_ctx.tb_phys_invalidate_count)); 1223 1224 tlb_flush_counts(&flush_full, &flush_part, &flush_elide); 1225 g_string_append_printf(buf, "TLB full flushes %zu\n", flush_full); 1226 g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part); 1227 g_string_append_printf(buf, "TLB elided flushes %zu\n", flush_elide); 1228 tcg_dump_info(buf); 1229 } 1230 1231 #else /* CONFIG_USER_ONLY */ 1232 1233 void cpu_interrupt(CPUState *cpu, int mask) 1234 { 1235 g_assert(qemu_mutex_iothread_locked()); 1236 cpu->interrupt_request |= mask; 1237 qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1); 1238 } 1239 1240 /* 1241 * Walks guest process memory "regions" one by one 1242 * and calls callback function 'fn' for each region. 1243 */ 1244 struct walk_memory_regions_data { 1245 walk_memory_regions_fn fn; 1246 void *priv; 1247 target_ulong start; 1248 int prot; 1249 }; 1250 1251 static int walk_memory_regions_end(struct walk_memory_regions_data *data, 1252 target_ulong end, int new_prot) 1253 { 1254 if (data->start != -1u) { 1255 int rc = data->fn(data->priv, data->start, end, data->prot); 1256 if (rc != 0) { 1257 return rc; 1258 } 1259 } 1260 1261 data->start = (new_prot ? end : -1u); 1262 data->prot = new_prot; 1263 1264 return 0; 1265 } 1266 1267 static int walk_memory_regions_1(struct walk_memory_regions_data *data, 1268 target_ulong base, int level, void **lp) 1269 { 1270 target_ulong pa; 1271 int i, rc; 1272 1273 if (*lp == NULL) { 1274 return walk_memory_regions_end(data, base, 0); 1275 } 1276 1277 if (level == 0) { 1278 PageDesc *pd = *lp; 1279 1280 for (i = 0; i < V_L2_SIZE; ++i) { 1281 int prot = pd[i].flags; 1282 1283 pa = base | (i << TARGET_PAGE_BITS); 1284 if (prot != data->prot) { 1285 rc = walk_memory_regions_end(data, pa, prot); 1286 if (rc != 0) { 1287 return rc; 1288 } 1289 } 1290 } 1291 } else { 1292 void **pp = *lp; 1293 1294 for (i = 0; i < V_L2_SIZE; ++i) { 1295 pa = base | ((target_ulong)i << 1296 (TARGET_PAGE_BITS + V_L2_BITS * level)); 1297 rc = walk_memory_regions_1(data, pa, level - 1, pp + i); 1298 if (rc != 0) { 1299 return rc; 1300 } 1301 } 1302 } 1303 1304 return 0; 1305 } 1306 1307 int walk_memory_regions(void *priv, walk_memory_regions_fn fn) 1308 { 1309 struct walk_memory_regions_data data; 1310 uintptr_t i, l1_sz = v_l1_size; 1311 1312 data.fn = fn; 1313 data.priv = priv; 1314 data.start = -1u; 1315 data.prot = 0; 1316 1317 for (i = 0; i < l1_sz; i++) { 1318 target_ulong base = i << (v_l1_shift + TARGET_PAGE_BITS); 1319 int rc = walk_memory_regions_1(&data, base, v_l2_levels, l1_map + i); 1320 if (rc != 0) { 1321 return rc; 1322 } 1323 } 1324 1325 return walk_memory_regions_end(&data, 0, 0); 1326 } 1327 1328 static int dump_region(void *priv, target_ulong start, 1329 target_ulong end, unsigned long prot) 1330 { 1331 FILE *f = (FILE *)priv; 1332 1333 (void) fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx 1334 " "TARGET_FMT_lx" %c%c%c\n", 1335 start, end, end - start, 1336 ((prot & PAGE_READ) ? 'r' : '-'), 1337 ((prot & PAGE_WRITE) ? 'w' : '-'), 1338 ((prot & PAGE_EXEC) ? 'x' : '-')); 1339 1340 return 0; 1341 } 1342 1343 /* dump memory mappings */ 1344 void page_dump(FILE *f) 1345 { 1346 const int length = sizeof(target_ulong) * 2; 1347 (void) fprintf(f, "%-*s %-*s %-*s %s\n", 1348 length, "start", length, "end", length, "size", "prot"); 1349 walk_memory_regions(f, dump_region); 1350 } 1351 1352 int page_get_flags(target_ulong address) 1353 { 1354 PageDesc *p; 1355 1356 p = page_find(address >> TARGET_PAGE_BITS); 1357 if (!p) { 1358 return 0; 1359 } 1360 return p->flags; 1361 } 1362 1363 /* 1364 * Allow the target to decide if PAGE_TARGET_[12] may be reset. 1365 * By default, they are not kept. 1366 */ 1367 #ifndef PAGE_TARGET_STICKY 1368 #define PAGE_TARGET_STICKY 0 1369 #endif 1370 #define PAGE_STICKY (PAGE_ANON | PAGE_PASSTHROUGH | PAGE_TARGET_STICKY) 1371 1372 /* Modify the flags of a page and invalidate the code if necessary. 1373 The flag PAGE_WRITE_ORG is positioned automatically depending 1374 on PAGE_WRITE. The mmap_lock should already be held. */ 1375 void page_set_flags(target_ulong start, target_ulong end, int flags) 1376 { 1377 target_ulong addr, len; 1378 bool reset, inval_tb = false; 1379 1380 /* This function should never be called with addresses outside the 1381 guest address space. If this assert fires, it probably indicates 1382 a missing call to h2g_valid. */ 1383 assert(end - 1 <= GUEST_ADDR_MAX); 1384 assert(start < end); 1385 /* Only set PAGE_ANON with new mappings. */ 1386 assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET)); 1387 assert_memory_lock(); 1388 1389 start = start & TARGET_PAGE_MASK; 1390 end = TARGET_PAGE_ALIGN(end); 1391 1392 if (flags & PAGE_WRITE) { 1393 flags |= PAGE_WRITE_ORG; 1394 } 1395 reset = !(flags & PAGE_VALID) || (flags & PAGE_RESET); 1396 if (reset) { 1397 page_reset_target_data(start, end); 1398 } 1399 flags &= ~PAGE_RESET; 1400 1401 for (addr = start, len = end - start; 1402 len != 0; 1403 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) { 1404 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, true); 1405 1406 /* 1407 * If the page was executable, but is reset, or is no longer 1408 * executable, or has become writable, then invalidate any code. 1409 */ 1410 if ((p->flags & PAGE_EXEC) 1411 && (reset || 1412 !(flags & PAGE_EXEC) || 1413 (flags & ~p->flags & PAGE_WRITE))) { 1414 inval_tb = true; 1415 } 1416 /* Using mprotect on a page does not change sticky bits. */ 1417 p->flags = (reset ? 0 : p->flags & PAGE_STICKY) | flags; 1418 } 1419 1420 if (inval_tb) { 1421 tb_invalidate_phys_range(start, end); 1422 } 1423 } 1424 1425 int page_check_range(target_ulong start, target_ulong len, int flags) 1426 { 1427 PageDesc *p; 1428 target_ulong end; 1429 target_ulong addr; 1430 1431 /* This function should never be called with addresses outside the 1432 guest address space. If this assert fires, it probably indicates 1433 a missing call to h2g_valid. */ 1434 if (TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS) { 1435 assert(start < ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS)); 1436 } 1437 1438 if (len == 0) { 1439 return 0; 1440 } 1441 if (start + len - 1 < start) { 1442 /* We've wrapped around. */ 1443 return -1; 1444 } 1445 1446 /* must do before we loose bits in the next step */ 1447 end = TARGET_PAGE_ALIGN(start + len); 1448 start = start & TARGET_PAGE_MASK; 1449 1450 for (addr = start, len = end - start; 1451 len != 0; 1452 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) { 1453 p = page_find(addr >> TARGET_PAGE_BITS); 1454 if (!p) { 1455 return -1; 1456 } 1457 if (!(p->flags & PAGE_VALID)) { 1458 return -1; 1459 } 1460 1461 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ)) { 1462 return -1; 1463 } 1464 if (flags & PAGE_WRITE) { 1465 if (!(p->flags & PAGE_WRITE_ORG)) { 1466 return -1; 1467 } 1468 /* unprotect the page if it was put read-only because it 1469 contains translated code */ 1470 if (!(p->flags & PAGE_WRITE)) { 1471 if (!page_unprotect(addr, 0)) { 1472 return -1; 1473 } 1474 } 1475 } 1476 } 1477 return 0; 1478 } 1479 1480 void page_protect(tb_page_addr_t page_addr) 1481 { 1482 target_ulong addr; 1483 PageDesc *p; 1484 int prot; 1485 1486 p = page_find(page_addr >> TARGET_PAGE_BITS); 1487 if (p && (p->flags & PAGE_WRITE)) { 1488 /* 1489 * Force the host page as non writable (writes will have a page fault + 1490 * mprotect overhead). 1491 */ 1492 page_addr &= qemu_host_page_mask; 1493 prot = 0; 1494 for (addr = page_addr; addr < page_addr + qemu_host_page_size; 1495 addr += TARGET_PAGE_SIZE) { 1496 1497 p = page_find(addr >> TARGET_PAGE_BITS); 1498 if (!p) { 1499 continue; 1500 } 1501 prot |= p->flags; 1502 p->flags &= ~PAGE_WRITE; 1503 } 1504 mprotect(g2h_untagged(page_addr), qemu_host_page_size, 1505 (prot & PAGE_BITS) & ~PAGE_WRITE); 1506 } 1507 } 1508 1509 /* called from signal handler: invalidate the code and unprotect the 1510 * page. Return 0 if the fault was not handled, 1 if it was handled, 1511 * and 2 if it was handled but the caller must cause the TB to be 1512 * immediately exited. (We can only return 2 if the 'pc' argument is 1513 * non-zero.) 1514 */ 1515 int page_unprotect(target_ulong address, uintptr_t pc) 1516 { 1517 unsigned int prot; 1518 bool current_tb_invalidated; 1519 PageDesc *p; 1520 target_ulong host_start, host_end, addr; 1521 1522 /* Technically this isn't safe inside a signal handler. However we 1523 know this only ever happens in a synchronous SEGV handler, so in 1524 practice it seems to be ok. */ 1525 mmap_lock(); 1526 1527 p = page_find(address >> TARGET_PAGE_BITS); 1528 if (!p) { 1529 mmap_unlock(); 1530 return 0; 1531 } 1532 1533 /* if the page was really writable, then we change its 1534 protection back to writable */ 1535 if (p->flags & PAGE_WRITE_ORG) { 1536 current_tb_invalidated = false; 1537 if (p->flags & PAGE_WRITE) { 1538 /* If the page is actually marked WRITE then assume this is because 1539 * this thread raced with another one which got here first and 1540 * set the page to PAGE_WRITE and did the TB invalidate for us. 1541 */ 1542 #ifdef TARGET_HAS_PRECISE_SMC 1543 TranslationBlock *current_tb = tcg_tb_lookup(pc); 1544 if (current_tb) { 1545 current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID; 1546 } 1547 #endif 1548 } else { 1549 host_start = address & qemu_host_page_mask; 1550 host_end = host_start + qemu_host_page_size; 1551 1552 prot = 0; 1553 for (addr = host_start; addr < host_end; addr += TARGET_PAGE_SIZE) { 1554 p = page_find(addr >> TARGET_PAGE_BITS); 1555 p->flags |= PAGE_WRITE; 1556 prot |= p->flags; 1557 1558 /* and since the content will be modified, we must invalidate 1559 the corresponding translated code. */ 1560 current_tb_invalidated |= 1561 tb_invalidate_phys_page_unwind(addr, pc); 1562 } 1563 mprotect((void *)g2h_untagged(host_start), qemu_host_page_size, 1564 prot & PAGE_BITS); 1565 } 1566 mmap_unlock(); 1567 /* If current TB was invalidated return to main loop */ 1568 return current_tb_invalidated ? 2 : 1; 1569 } 1570 mmap_unlock(); 1571 return 0; 1572 } 1573 #endif /* CONFIG_USER_ONLY */ 1574 1575 /* 1576 * Called by generic code at e.g. cpu reset after cpu creation, 1577 * therefore we must be prepared to allocate the jump cache. 1578 */ 1579 void tcg_flush_jmp_cache(CPUState *cpu) 1580 { 1581 CPUJumpCache *jc = cpu->tb_jmp_cache; 1582 1583 /* During early initialization, the cache may not yet be allocated. */ 1584 if (unlikely(jc == NULL)) { 1585 return; 1586 } 1587 1588 for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) { 1589 qatomic_set(&jc->array[i].tb, NULL); 1590 } 1591 } 1592 1593 /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */ 1594 void tcg_flush_softmmu_tlb(CPUState *cs) 1595 { 1596 #ifdef CONFIG_SOFTMMU 1597 tlb_flush(cs); 1598 #endif 1599 } 1600