1 /* 2 * Host code generation 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 22 #define NO_CPU_IO_DEFS 23 #include "trace.h" 24 #include "disas/disas.h" 25 #include "exec/exec-all.h" 26 #include "tcg/tcg.h" 27 #if defined(CONFIG_USER_ONLY) 28 #include "qemu.h" 29 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 30 #include <sys/param.h> 31 #if __FreeBSD_version >= 700104 32 #define HAVE_KINFO_GETVMMAP 33 #define sigqueue sigqueue_freebsd /* avoid redefinition */ 34 #include <sys/proc.h> 35 #include <machine/profile.h> 36 #define _KERNEL 37 #include <sys/user.h> 38 #undef _KERNEL 39 #undef sigqueue 40 #include <libutil.h> 41 #endif 42 #endif 43 #else 44 #include "exec/ram_addr.h" 45 #endif 46 47 #include "exec/cputlb.h" 48 #include "exec/translate-all.h" 49 #include "exec/translator.h" 50 #include "qemu/bitmap.h" 51 #include "qemu/qemu-print.h" 52 #include "qemu/timer.h" 53 #include "qemu/main-loop.h" 54 #include "qemu/cacheinfo.h" 55 #include "exec/log.h" 56 #include "sysemu/cpus.h" 57 #include "sysemu/cpu-timers.h" 58 #include "sysemu/tcg.h" 59 #include "qapi/error.h" 60 #include "hw/core/tcg-cpu-ops.h" 61 #include "tb-jmp-cache.h" 62 #include "tb-hash.h" 63 #include "tb-context.h" 64 #include "internal.h" 65 66 /* make various TB consistency checks */ 67 68 /** 69 * struct page_entry - page descriptor entry 70 * @pd: pointer to the &struct PageDesc of the page this entry represents 71 * @index: page index of the page 72 * @locked: whether the page is locked 73 * 74 * This struct helps us keep track of the locked state of a page, without 75 * bloating &struct PageDesc. 76 * 77 * A page lock protects accesses to all fields of &struct PageDesc. 78 * 79 * See also: &struct page_collection. 80 */ 81 struct page_entry { 82 PageDesc *pd; 83 tb_page_addr_t index; 84 bool locked; 85 }; 86 87 /** 88 * struct page_collection - tracks a set of pages (i.e. &struct page_entry's) 89 * @tree: Binary search tree (BST) of the pages, with key == page index 90 * @max: Pointer to the page in @tree with the highest page index 91 * 92 * To avoid deadlock we lock pages in ascending order of page index. 93 * When operating on a set of pages, we need to keep track of them so that 94 * we can lock them in order and also unlock them later. For this we collect 95 * pages (i.e. &struct page_entry's) in a binary search @tree. Given that the 96 * @tree implementation we use does not provide an O(1) operation to obtain the 97 * highest-ranked element, we use @max to keep track of the inserted page 98 * with the highest index. This is valuable because if a page is not in 99 * the tree and its index is higher than @max's, then we can lock it 100 * without breaking the locking order rule. 101 * 102 * Note on naming: 'struct page_set' would be shorter, but we already have a few 103 * page_set_*() helpers, so page_collection is used instead to avoid confusion. 104 * 105 * See also: page_collection_lock(). 106 */ 107 struct page_collection { 108 GTree *tree; 109 struct page_entry *max; 110 }; 111 112 /* 113 * In system mode we want L1_MAP to be based on ram offsets, 114 * while in user mode we want it to be based on virtual addresses. 115 * 116 * TODO: For user mode, see the caveat re host vs guest virtual 117 * address spaces near GUEST_ADDR_MAX. 118 */ 119 #if !defined(CONFIG_USER_ONLY) 120 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS 121 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS 122 #else 123 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS 124 #endif 125 #else 126 # define L1_MAP_ADDR_SPACE_BITS MIN(HOST_LONG_BITS, TARGET_ABI_BITS) 127 #endif 128 129 /* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */ 130 QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS > 131 sizeof_field(TranslationBlock, trace_vcpu_dstate) 132 * BITS_PER_BYTE); 133 134 /* 135 * L1 Mapping properties 136 */ 137 int v_l1_size; 138 int v_l1_shift; 139 int v_l2_levels; 140 141 void *l1_map[V_L1_MAX_SIZE]; 142 143 TBContext tb_ctx; 144 145 static void page_table_config_init(void) 146 { 147 uint32_t v_l1_bits; 148 149 assert(TARGET_PAGE_BITS); 150 /* The bits remaining after N lower levels of page tables. */ 151 v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS; 152 if (v_l1_bits < V_L1_MIN_BITS) { 153 v_l1_bits += V_L2_BITS; 154 } 155 156 v_l1_size = 1 << v_l1_bits; 157 v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits; 158 v_l2_levels = v_l1_shift / V_L2_BITS - 1; 159 160 assert(v_l1_bits <= V_L1_MAX_BITS); 161 assert(v_l1_shift % V_L2_BITS == 0); 162 assert(v_l2_levels >= 0); 163 } 164 165 /* Encode VAL as a signed leb128 sequence at P. 166 Return P incremented past the encoded value. */ 167 static uint8_t *encode_sleb128(uint8_t *p, target_long val) 168 { 169 int more, byte; 170 171 do { 172 byte = val & 0x7f; 173 val >>= 7; 174 more = !((val == 0 && (byte & 0x40) == 0) 175 || (val == -1 && (byte & 0x40) != 0)); 176 if (more) { 177 byte |= 0x80; 178 } 179 *p++ = byte; 180 } while (more); 181 182 return p; 183 } 184 185 /* Decode a signed leb128 sequence at *PP; increment *PP past the 186 decoded value. Return the decoded value. */ 187 static target_long decode_sleb128(const uint8_t **pp) 188 { 189 const uint8_t *p = *pp; 190 target_long val = 0; 191 int byte, shift = 0; 192 193 do { 194 byte = *p++; 195 val |= (target_ulong)(byte & 0x7f) << shift; 196 shift += 7; 197 } while (byte & 0x80); 198 if (shift < TARGET_LONG_BITS && (byte & 0x40)) { 199 val |= -(target_ulong)1 << shift; 200 } 201 202 *pp = p; 203 return val; 204 } 205 206 /* Encode the data collected about the instructions while compiling TB. 207 Place the data at BLOCK, and return the number of bytes consumed. 208 209 The logical table consists of TARGET_INSN_START_WORDS target_ulong's, 210 which come from the target's insn_start data, followed by a uintptr_t 211 which comes from the host pc of the end of the code implementing the insn. 212 213 Each line of the table is encoded as sleb128 deltas from the previous 214 line. The seed for the first line is { tb->pc, 0..., tb->tc.ptr }. 215 That is, the first column is seeded with the guest pc, the last column 216 with the host pc, and the middle columns with zeros. */ 217 218 static int encode_search(TranslationBlock *tb, uint8_t *block) 219 { 220 uint8_t *highwater = tcg_ctx->code_gen_highwater; 221 uint8_t *p = block; 222 int i, j, n; 223 224 for (i = 0, n = tb->icount; i < n; ++i) { 225 target_ulong prev; 226 227 for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { 228 if (i == 0) { 229 prev = (!TARGET_TB_PCREL && j == 0 ? tb_pc(tb) : 0); 230 } else { 231 prev = tcg_ctx->gen_insn_data[i - 1][j]; 232 } 233 p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev); 234 } 235 prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]); 236 p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev); 237 238 /* Test for (pending) buffer overflow. The assumption is that any 239 one row beginning below the high water mark cannot overrun 240 the buffer completely. Thus we can test for overflow after 241 encoding a row without having to check during encoding. */ 242 if (unlikely(p > highwater)) { 243 return -1; 244 } 245 } 246 247 return p - block; 248 } 249 250 static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc, 251 uint64_t *data) 252 { 253 uintptr_t iter_pc = (uintptr_t)tb->tc.ptr; 254 const uint8_t *p = tb->tc.ptr + tb->tc.size; 255 int i, j, num_insns = tb->icount; 256 257 host_pc -= GETPC_ADJ; 258 259 if (host_pc < iter_pc) { 260 return -1; 261 } 262 263 memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS); 264 if (!TARGET_TB_PCREL) { 265 data[0] = tb_pc(tb); 266 } 267 268 /* 269 * Reconstruct the stored insn data while looking for the point 270 * at which the end of the insn exceeds host_pc. 271 */ 272 for (i = 0; i < num_insns; ++i) { 273 for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { 274 data[j] += decode_sleb128(&p); 275 } 276 iter_pc += decode_sleb128(&p); 277 if (iter_pc > host_pc) { 278 return num_insns - i; 279 } 280 } 281 return -1; 282 } 283 284 /* 285 * The cpu state corresponding to 'host_pc' is restored in 286 * preparation for exiting the TB. 287 */ 288 void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, 289 uintptr_t host_pc) 290 { 291 uint64_t data[TARGET_INSN_START_WORDS]; 292 #ifdef CONFIG_PROFILER 293 TCGProfile *prof = &tcg_ctx->prof; 294 int64_t ti = profile_getclock(); 295 #endif 296 int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data); 297 298 if (insns_left < 0) { 299 return; 300 } 301 302 if (tb_cflags(tb) & CF_USE_ICOUNT) { 303 assert(icount_enabled()); 304 /* 305 * Reset the cycle counter to the start of the block and 306 * shift if to the number of actually executed instructions. 307 */ 308 cpu_neg(cpu)->icount_decr.u16.low += insns_left; 309 } 310 311 cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data); 312 313 #ifdef CONFIG_PROFILER 314 qatomic_set(&prof->restore_time, 315 prof->restore_time + profile_getclock() - ti); 316 qatomic_set(&prof->restore_count, prof->restore_count + 1); 317 #endif 318 } 319 320 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc) 321 { 322 /* 323 * The host_pc has to be in the rx region of the code buffer. 324 * If it is not we will not be able to resolve it here. 325 * The two cases where host_pc will not be correct are: 326 * 327 * - fault during translation (instruction fetch) 328 * - fault from helper (not using GETPC() macro) 329 * 330 * Either way we need return early as we can't resolve it here. 331 */ 332 if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) { 333 TranslationBlock *tb = tcg_tb_lookup(host_pc); 334 if (tb) { 335 cpu_restore_state_from_tb(cpu, tb, host_pc); 336 return true; 337 } 338 } 339 return false; 340 } 341 342 bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data) 343 { 344 if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) { 345 TranslationBlock *tb = tcg_tb_lookup(host_pc); 346 if (tb) { 347 return cpu_unwind_data_from_tb(tb, host_pc, data) >= 0; 348 } 349 } 350 return false; 351 } 352 353 void page_init(void) 354 { 355 page_size_init(); 356 page_table_config_init(); 357 358 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY) 359 { 360 #ifdef HAVE_KINFO_GETVMMAP 361 struct kinfo_vmentry *freep; 362 int i, cnt; 363 364 freep = kinfo_getvmmap(getpid(), &cnt); 365 if (freep) { 366 mmap_lock(); 367 for (i = 0; i < cnt; i++) { 368 unsigned long startaddr, endaddr; 369 370 startaddr = freep[i].kve_start; 371 endaddr = freep[i].kve_end; 372 if (h2g_valid(startaddr)) { 373 startaddr = h2g(startaddr) & TARGET_PAGE_MASK; 374 375 if (h2g_valid(endaddr)) { 376 endaddr = h2g(endaddr); 377 page_set_flags(startaddr, endaddr, PAGE_RESERVED); 378 } else { 379 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS 380 endaddr = ~0ul; 381 page_set_flags(startaddr, endaddr, PAGE_RESERVED); 382 #endif 383 } 384 } 385 } 386 free(freep); 387 mmap_unlock(); 388 } 389 #else 390 FILE *f; 391 392 last_brk = (unsigned long)sbrk(0); 393 394 f = fopen("/compat/linux/proc/self/maps", "r"); 395 if (f) { 396 mmap_lock(); 397 398 do { 399 unsigned long startaddr, endaddr; 400 int n; 401 402 n = fscanf(f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr); 403 404 if (n == 2 && h2g_valid(startaddr)) { 405 startaddr = h2g(startaddr) & TARGET_PAGE_MASK; 406 407 if (h2g_valid(endaddr)) { 408 endaddr = h2g(endaddr); 409 } else { 410 endaddr = ~0ul; 411 } 412 page_set_flags(startaddr, endaddr, PAGE_RESERVED); 413 } 414 } while (!feof(f)); 415 416 fclose(f); 417 mmap_unlock(); 418 } 419 #endif 420 } 421 #endif 422 } 423 424 PageDesc *page_find_alloc(tb_page_addr_t index, bool alloc) 425 { 426 PageDesc *pd; 427 void **lp; 428 int i; 429 430 /* Level 1. Always allocated. */ 431 lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1)); 432 433 /* Level 2..N-1. */ 434 for (i = v_l2_levels; i > 0; i--) { 435 void **p = qatomic_rcu_read(lp); 436 437 if (p == NULL) { 438 void *existing; 439 440 if (!alloc) { 441 return NULL; 442 } 443 p = g_new0(void *, V_L2_SIZE); 444 existing = qatomic_cmpxchg(lp, NULL, p); 445 if (unlikely(existing)) { 446 g_free(p); 447 p = existing; 448 } 449 } 450 451 lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1)); 452 } 453 454 pd = qatomic_rcu_read(lp); 455 if (pd == NULL) { 456 void *existing; 457 458 if (!alloc) { 459 return NULL; 460 } 461 pd = g_new0(PageDesc, V_L2_SIZE); 462 #ifndef CONFIG_USER_ONLY 463 { 464 int i; 465 466 for (i = 0; i < V_L2_SIZE; i++) { 467 qemu_spin_init(&pd[i].lock); 468 } 469 } 470 #endif 471 existing = qatomic_cmpxchg(lp, NULL, pd); 472 if (unlikely(existing)) { 473 #ifndef CONFIG_USER_ONLY 474 { 475 int i; 476 477 for (i = 0; i < V_L2_SIZE; i++) { 478 qemu_spin_destroy(&pd[i].lock); 479 } 480 } 481 #endif 482 g_free(pd); 483 pd = existing; 484 } 485 } 486 487 return pd + (index & (V_L2_SIZE - 1)); 488 } 489 490 /* In user-mode page locks aren't used; mmap_lock is enough */ 491 #ifdef CONFIG_USER_ONLY 492 struct page_collection * 493 page_collection_lock(tb_page_addr_t start, tb_page_addr_t end) 494 { 495 return NULL; 496 } 497 498 void page_collection_unlock(struct page_collection *set) 499 { } 500 #else /* !CONFIG_USER_ONLY */ 501 502 #ifdef CONFIG_DEBUG_TCG 503 504 static __thread GHashTable *ht_pages_locked_debug; 505 506 static void ht_pages_locked_debug_init(void) 507 { 508 if (ht_pages_locked_debug) { 509 return; 510 } 511 ht_pages_locked_debug = g_hash_table_new(NULL, NULL); 512 } 513 514 static bool page_is_locked(const PageDesc *pd) 515 { 516 PageDesc *found; 517 518 ht_pages_locked_debug_init(); 519 found = g_hash_table_lookup(ht_pages_locked_debug, pd); 520 return !!found; 521 } 522 523 static void page_lock__debug(PageDesc *pd) 524 { 525 ht_pages_locked_debug_init(); 526 g_assert(!page_is_locked(pd)); 527 g_hash_table_insert(ht_pages_locked_debug, pd, pd); 528 } 529 530 static void page_unlock__debug(const PageDesc *pd) 531 { 532 bool removed; 533 534 ht_pages_locked_debug_init(); 535 g_assert(page_is_locked(pd)); 536 removed = g_hash_table_remove(ht_pages_locked_debug, pd); 537 g_assert(removed); 538 } 539 540 void do_assert_page_locked(const PageDesc *pd, const char *file, int line) 541 { 542 if (unlikely(!page_is_locked(pd))) { 543 error_report("assert_page_lock: PageDesc %p not locked @ %s:%d", 544 pd, file, line); 545 abort(); 546 } 547 } 548 549 void assert_no_pages_locked(void) 550 { 551 ht_pages_locked_debug_init(); 552 g_assert(g_hash_table_size(ht_pages_locked_debug) == 0); 553 } 554 555 #else /* !CONFIG_DEBUG_TCG */ 556 557 static inline void page_lock__debug(const PageDesc *pd) { } 558 static inline void page_unlock__debug(const PageDesc *pd) { } 559 560 #endif /* CONFIG_DEBUG_TCG */ 561 562 void page_lock(PageDesc *pd) 563 { 564 page_lock__debug(pd); 565 qemu_spin_lock(&pd->lock); 566 } 567 568 void page_unlock(PageDesc *pd) 569 { 570 qemu_spin_unlock(&pd->lock); 571 page_unlock__debug(pd); 572 } 573 574 static inline struct page_entry * 575 page_entry_new(PageDesc *pd, tb_page_addr_t index) 576 { 577 struct page_entry *pe = g_malloc(sizeof(*pe)); 578 579 pe->index = index; 580 pe->pd = pd; 581 pe->locked = false; 582 return pe; 583 } 584 585 static void page_entry_destroy(gpointer p) 586 { 587 struct page_entry *pe = p; 588 589 g_assert(pe->locked); 590 page_unlock(pe->pd); 591 g_free(pe); 592 } 593 594 /* returns false on success */ 595 static bool page_entry_trylock(struct page_entry *pe) 596 { 597 bool busy; 598 599 busy = qemu_spin_trylock(&pe->pd->lock); 600 if (!busy) { 601 g_assert(!pe->locked); 602 pe->locked = true; 603 page_lock__debug(pe->pd); 604 } 605 return busy; 606 } 607 608 static void do_page_entry_lock(struct page_entry *pe) 609 { 610 page_lock(pe->pd); 611 g_assert(!pe->locked); 612 pe->locked = true; 613 } 614 615 static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data) 616 { 617 struct page_entry *pe = value; 618 619 do_page_entry_lock(pe); 620 return FALSE; 621 } 622 623 static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data) 624 { 625 struct page_entry *pe = value; 626 627 if (pe->locked) { 628 pe->locked = false; 629 page_unlock(pe->pd); 630 } 631 return FALSE; 632 } 633 634 /* 635 * Trylock a page, and if successful, add the page to a collection. 636 * Returns true ("busy") if the page could not be locked; false otherwise. 637 */ 638 static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr) 639 { 640 tb_page_addr_t index = addr >> TARGET_PAGE_BITS; 641 struct page_entry *pe; 642 PageDesc *pd; 643 644 pe = g_tree_lookup(set->tree, &index); 645 if (pe) { 646 return false; 647 } 648 649 pd = page_find(index); 650 if (pd == NULL) { 651 return false; 652 } 653 654 pe = page_entry_new(pd, index); 655 g_tree_insert(set->tree, &pe->index, pe); 656 657 /* 658 * If this is either (1) the first insertion or (2) a page whose index 659 * is higher than any other so far, just lock the page and move on. 660 */ 661 if (set->max == NULL || pe->index > set->max->index) { 662 set->max = pe; 663 do_page_entry_lock(pe); 664 return false; 665 } 666 /* 667 * Try to acquire out-of-order lock; if busy, return busy so that we acquire 668 * locks in order. 669 */ 670 return page_entry_trylock(pe); 671 } 672 673 static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata) 674 { 675 tb_page_addr_t a = *(const tb_page_addr_t *)ap; 676 tb_page_addr_t b = *(const tb_page_addr_t *)bp; 677 678 if (a == b) { 679 return 0; 680 } else if (a < b) { 681 return -1; 682 } 683 return 1; 684 } 685 686 /* 687 * Lock a range of pages ([@start,@end[) as well as the pages of all 688 * intersecting TBs. 689 * Locking order: acquire locks in ascending order of page index. 690 */ 691 struct page_collection * 692 page_collection_lock(tb_page_addr_t start, tb_page_addr_t end) 693 { 694 struct page_collection *set = g_malloc(sizeof(*set)); 695 tb_page_addr_t index; 696 PageDesc *pd; 697 698 start >>= TARGET_PAGE_BITS; 699 end >>= TARGET_PAGE_BITS; 700 g_assert(start <= end); 701 702 set->tree = g_tree_new_full(tb_page_addr_cmp, NULL, NULL, 703 page_entry_destroy); 704 set->max = NULL; 705 assert_no_pages_locked(); 706 707 retry: 708 g_tree_foreach(set->tree, page_entry_lock, NULL); 709 710 for (index = start; index <= end; index++) { 711 TranslationBlock *tb; 712 int n; 713 714 pd = page_find(index); 715 if (pd == NULL) { 716 continue; 717 } 718 if (page_trylock_add(set, index << TARGET_PAGE_BITS)) { 719 g_tree_foreach(set->tree, page_entry_unlock, NULL); 720 goto retry; 721 } 722 assert_page_locked(pd); 723 PAGE_FOR_EACH_TB(pd, tb, n) { 724 if (page_trylock_add(set, tb_page_addr0(tb)) || 725 (tb_page_addr1(tb) != -1 && 726 page_trylock_add(set, tb_page_addr1(tb)))) { 727 /* drop all locks, and reacquire in order */ 728 g_tree_foreach(set->tree, page_entry_unlock, NULL); 729 goto retry; 730 } 731 } 732 } 733 return set; 734 } 735 736 void page_collection_unlock(struct page_collection *set) 737 { 738 /* entries are unlocked and freed via page_entry_destroy */ 739 g_tree_destroy(set->tree); 740 g_free(set); 741 } 742 743 #endif /* !CONFIG_USER_ONLY */ 744 745 /* 746 * Isolate the portion of code gen which can setjmp/longjmp. 747 * Return the size of the generated code, or negative on error. 748 */ 749 static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb, 750 target_ulong pc, void *host_pc, 751 int *max_insns, int64_t *ti) 752 { 753 int ret = sigsetjmp(tcg_ctx->jmp_trans, 0); 754 if (unlikely(ret != 0)) { 755 return ret; 756 } 757 758 tcg_func_start(tcg_ctx); 759 760 tcg_ctx->cpu = env_cpu(env); 761 gen_intermediate_code(env_cpu(env), tb, *max_insns, pc, host_pc); 762 assert(tb->size != 0); 763 tcg_ctx->cpu = NULL; 764 *max_insns = tb->icount; 765 766 #ifdef CONFIG_PROFILER 767 qatomic_set(&tcg_ctx->prof.tb_count, tcg_ctx->prof.tb_count + 1); 768 qatomic_set(&tcg_ctx->prof.interm_time, 769 tcg_ctx->prof.interm_time + profile_getclock() - *ti); 770 *ti = profile_getclock(); 771 #endif 772 773 return tcg_gen_code(tcg_ctx, tb, pc); 774 } 775 776 /* Called with mmap_lock held for user mode emulation. */ 777 TranslationBlock *tb_gen_code(CPUState *cpu, 778 target_ulong pc, target_ulong cs_base, 779 uint32_t flags, int cflags) 780 { 781 CPUArchState *env = cpu->env_ptr; 782 TranslationBlock *tb, *existing_tb; 783 tb_page_addr_t phys_pc; 784 tcg_insn_unit *gen_code_buf; 785 int gen_code_size, search_size, max_insns; 786 #ifdef CONFIG_PROFILER 787 TCGProfile *prof = &tcg_ctx->prof; 788 #endif 789 int64_t ti; 790 void *host_pc; 791 792 assert_memory_lock(); 793 qemu_thread_jit_write(); 794 795 phys_pc = get_page_addr_code_hostp(env, pc, &host_pc); 796 797 if (phys_pc == -1) { 798 /* Generate a one-shot TB with 1 insn in it */ 799 cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1; 800 } 801 802 max_insns = cflags & CF_COUNT_MASK; 803 if (max_insns == 0) { 804 max_insns = TCG_MAX_INSNS; 805 } 806 QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS); 807 808 buffer_overflow: 809 tb = tcg_tb_alloc(tcg_ctx); 810 if (unlikely(!tb)) { 811 /* flush must be done */ 812 tb_flush(cpu); 813 mmap_unlock(); 814 /* Make the execution loop process the flush as soon as possible. */ 815 cpu->exception_index = EXCP_INTERRUPT; 816 cpu_loop_exit(cpu); 817 } 818 819 gen_code_buf = tcg_ctx->code_gen_ptr; 820 tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf); 821 #if !TARGET_TB_PCREL 822 tb->pc = pc; 823 #endif 824 tb->cs_base = cs_base; 825 tb->flags = flags; 826 tb->cflags = cflags; 827 tb->trace_vcpu_dstate = *cpu->trace_dstate; 828 tb_set_page_addr0(tb, phys_pc); 829 tb_set_page_addr1(tb, -1); 830 tcg_ctx->tb_cflags = cflags; 831 tb_overflow: 832 833 #ifdef CONFIG_PROFILER 834 /* includes aborted translations because of exceptions */ 835 qatomic_set(&prof->tb_count1, prof->tb_count1 + 1); 836 ti = profile_getclock(); 837 #endif 838 839 trace_translate_block(tb, pc, tb->tc.ptr); 840 841 gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti); 842 if (unlikely(gen_code_size < 0)) { 843 switch (gen_code_size) { 844 case -1: 845 /* 846 * Overflow of code_gen_buffer, or the current slice of it. 847 * 848 * TODO: We don't need to re-do gen_intermediate_code, nor 849 * should we re-do the tcg optimization currently hidden 850 * inside tcg_gen_code. All that should be required is to 851 * flush the TBs, allocate a new TB, re-initialize it per 852 * above, and re-do the actual code generation. 853 */ 854 qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT, 855 "Restarting code generation for " 856 "code_gen_buffer overflow\n"); 857 goto buffer_overflow; 858 859 case -2: 860 /* 861 * The code generated for the TranslationBlock is too large. 862 * The maximum size allowed by the unwind info is 64k. 863 * There may be stricter constraints from relocations 864 * in the tcg backend. 865 * 866 * Try again with half as many insns as we attempted this time. 867 * If a single insn overflows, there's a bug somewhere... 868 */ 869 assert(max_insns > 1); 870 max_insns /= 2; 871 qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT, 872 "Restarting code generation with " 873 "smaller translation block (max %d insns)\n", 874 max_insns); 875 goto tb_overflow; 876 877 default: 878 g_assert_not_reached(); 879 } 880 } 881 search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size); 882 if (unlikely(search_size < 0)) { 883 goto buffer_overflow; 884 } 885 tb->tc.size = gen_code_size; 886 887 #ifdef CONFIG_PROFILER 888 qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti); 889 qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size); 890 qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size); 891 qatomic_set(&prof->search_out_len, prof->search_out_len + search_size); 892 #endif 893 894 #ifdef DEBUG_DISAS 895 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) && 896 qemu_log_in_addr_range(pc)) { 897 FILE *logfile = qemu_log_trylock(); 898 if (logfile) { 899 int code_size, data_size; 900 const tcg_target_ulong *rx_data_gen_ptr; 901 size_t chunk_start; 902 int insn = 0; 903 904 if (tcg_ctx->data_gen_ptr) { 905 rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr); 906 code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr; 907 data_size = gen_code_size - code_size; 908 } else { 909 rx_data_gen_ptr = 0; 910 code_size = gen_code_size; 911 data_size = 0; 912 } 913 914 /* Dump header and the first instruction */ 915 fprintf(logfile, "OUT: [size=%d]\n", gen_code_size); 916 fprintf(logfile, 917 " -- guest addr 0x" TARGET_FMT_lx " + tb prologue\n", 918 tcg_ctx->gen_insn_data[insn][0]); 919 chunk_start = tcg_ctx->gen_insn_end_off[insn]; 920 disas(logfile, tb->tc.ptr, chunk_start); 921 922 /* 923 * Dump each instruction chunk, wrapping up empty chunks into 924 * the next instruction. The whole array is offset so the 925 * first entry is the beginning of the 2nd instruction. 926 */ 927 while (insn < tb->icount) { 928 size_t chunk_end = tcg_ctx->gen_insn_end_off[insn]; 929 if (chunk_end > chunk_start) { 930 fprintf(logfile, " -- guest addr 0x" TARGET_FMT_lx "\n", 931 tcg_ctx->gen_insn_data[insn][0]); 932 disas(logfile, tb->tc.ptr + chunk_start, 933 chunk_end - chunk_start); 934 chunk_start = chunk_end; 935 } 936 insn++; 937 } 938 939 if (chunk_start < code_size) { 940 fprintf(logfile, " -- tb slow paths + alignment\n"); 941 disas(logfile, tb->tc.ptr + chunk_start, 942 code_size - chunk_start); 943 } 944 945 /* Finally dump any data we may have after the block */ 946 if (data_size) { 947 int i; 948 fprintf(logfile, " data: [size=%d]\n", data_size); 949 for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) { 950 if (sizeof(tcg_target_ulong) == 8) { 951 fprintf(logfile, 952 "0x%08" PRIxPTR ": .quad 0x%016" TCG_PRIlx "\n", 953 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]); 954 } else if (sizeof(tcg_target_ulong) == 4) { 955 fprintf(logfile, 956 "0x%08" PRIxPTR ": .long 0x%08" TCG_PRIlx "\n", 957 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]); 958 } else { 959 qemu_build_not_reached(); 960 } 961 } 962 } 963 fprintf(logfile, "\n"); 964 qemu_log_unlock(logfile); 965 } 966 } 967 #endif 968 969 qatomic_set(&tcg_ctx->code_gen_ptr, (void *) 970 ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size, 971 CODE_GEN_ALIGN)); 972 973 /* init jump list */ 974 qemu_spin_init(&tb->jmp_lock); 975 tb->jmp_list_head = (uintptr_t)NULL; 976 tb->jmp_list_next[0] = (uintptr_t)NULL; 977 tb->jmp_list_next[1] = (uintptr_t)NULL; 978 tb->jmp_dest[0] = (uintptr_t)NULL; 979 tb->jmp_dest[1] = (uintptr_t)NULL; 980 981 /* init original jump addresses which have been set during tcg_gen_code() */ 982 if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) { 983 tb_reset_jump(tb, 0); 984 } 985 if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) { 986 tb_reset_jump(tb, 1); 987 } 988 989 /* 990 * If the TB is not associated with a physical RAM page then it must be 991 * a temporary one-insn TB, and we have nothing left to do. Return early 992 * before attempting to link to other TBs or add to the lookup table. 993 */ 994 if (tb_page_addr0(tb) == -1) { 995 return tb; 996 } 997 998 /* 999 * Insert TB into the corresponding region tree before publishing it 1000 * through QHT. Otherwise rewinding happened in the TB might fail to 1001 * lookup itself using host PC. 1002 */ 1003 tcg_tb_insert(tb); 1004 1005 /* 1006 * No explicit memory barrier is required -- tb_link_page() makes the 1007 * TB visible in a consistent state. 1008 */ 1009 existing_tb = tb_link_page(tb, tb_page_addr0(tb), tb_page_addr1(tb)); 1010 /* if the TB already exists, discard what we just translated */ 1011 if (unlikely(existing_tb != tb)) { 1012 uintptr_t orig_aligned = (uintptr_t)gen_code_buf; 1013 1014 orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize); 1015 qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned); 1016 tcg_tb_remove(tb); 1017 return existing_tb; 1018 } 1019 return tb; 1020 } 1021 1022 /* user-mode: call with mmap_lock held */ 1023 void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr) 1024 { 1025 TranslationBlock *tb; 1026 1027 assert_memory_lock(); 1028 1029 tb = tcg_tb_lookup(retaddr); 1030 if (tb) { 1031 /* We can use retranslation to find the PC. */ 1032 cpu_restore_state_from_tb(cpu, tb, retaddr); 1033 tb_phys_invalidate(tb, -1); 1034 } else { 1035 /* The exception probably happened in a helper. The CPU state should 1036 have been saved before calling it. Fetch the PC from there. */ 1037 CPUArchState *env = cpu->env_ptr; 1038 target_ulong pc, cs_base; 1039 tb_page_addr_t addr; 1040 uint32_t flags; 1041 1042 cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); 1043 addr = get_page_addr_code(env, pc); 1044 if (addr != -1) { 1045 tb_invalidate_phys_range(addr, addr + 1); 1046 } 1047 } 1048 } 1049 1050 #ifndef CONFIG_USER_ONLY 1051 /* 1052 * In deterministic execution mode, instructions doing device I/Os 1053 * must be at the end of the TB. 1054 * 1055 * Called by softmmu_template.h, with iothread mutex not held. 1056 */ 1057 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) 1058 { 1059 TranslationBlock *tb; 1060 CPUClass *cc; 1061 uint32_t n; 1062 1063 tb = tcg_tb_lookup(retaddr); 1064 if (!tb) { 1065 cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p", 1066 (void *)retaddr); 1067 } 1068 cpu_restore_state_from_tb(cpu, tb, retaddr); 1069 1070 /* 1071 * Some guests must re-execute the branch when re-executing a delay 1072 * slot instruction. When this is the case, adjust icount and N 1073 * to account for the re-execution of the branch. 1074 */ 1075 n = 1; 1076 cc = CPU_GET_CLASS(cpu); 1077 if (cc->tcg_ops->io_recompile_replay_branch && 1078 cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) { 1079 cpu_neg(cpu)->icount_decr.u16.low++; 1080 n = 2; 1081 } 1082 1083 /* 1084 * Exit the loop and potentially generate a new TB executing the 1085 * just the I/O insns. We also limit instrumentation to memory 1086 * operations only (which execute after completion) so we don't 1087 * double instrument the instruction. 1088 */ 1089 cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n; 1090 1091 if (qemu_loglevel_mask(CPU_LOG_EXEC)) { 1092 target_ulong pc = log_pc(cpu, tb); 1093 if (qemu_log_in_addr_range(pc)) { 1094 qemu_log("cpu_io_recompile: rewound execution of TB to " 1095 TARGET_FMT_lx "\n", pc); 1096 } 1097 } 1098 1099 cpu_loop_exit_noexc(cpu); 1100 } 1101 1102 static void print_qht_statistics(struct qht_stats hst, GString *buf) 1103 { 1104 uint32_t hgram_opts; 1105 size_t hgram_bins; 1106 char *hgram; 1107 1108 if (!hst.head_buckets) { 1109 return; 1110 } 1111 g_string_append_printf(buf, "TB hash buckets %zu/%zu " 1112 "(%0.2f%% head buckets used)\n", 1113 hst.used_head_buckets, hst.head_buckets, 1114 (double)hst.used_head_buckets / 1115 hst.head_buckets * 100); 1116 1117 hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; 1118 hgram_opts |= QDIST_PR_100X | QDIST_PR_PERCENT; 1119 if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) { 1120 hgram_opts |= QDIST_PR_NODECIMAL; 1121 } 1122 hgram = qdist_pr(&hst.occupancy, 10, hgram_opts); 1123 g_string_append_printf(buf, "TB hash occupancy %0.2f%% avg chain occ. " 1124 "Histogram: %s\n", 1125 qdist_avg(&hst.occupancy) * 100, hgram); 1126 g_free(hgram); 1127 1128 hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; 1129 hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain); 1130 if (hgram_bins > 10) { 1131 hgram_bins = 10; 1132 } else { 1133 hgram_bins = 0; 1134 hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE; 1135 } 1136 hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts); 1137 g_string_append_printf(buf, "TB hash avg chain %0.3f buckets. " 1138 "Histogram: %s\n", 1139 qdist_avg(&hst.chain), hgram); 1140 g_free(hgram); 1141 } 1142 1143 struct tb_tree_stats { 1144 size_t nb_tbs; 1145 size_t host_size; 1146 size_t target_size; 1147 size_t max_target_size; 1148 size_t direct_jmp_count; 1149 size_t direct_jmp2_count; 1150 size_t cross_page; 1151 }; 1152 1153 static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data) 1154 { 1155 const TranslationBlock *tb = value; 1156 struct tb_tree_stats *tst = data; 1157 1158 tst->nb_tbs++; 1159 tst->host_size += tb->tc.size; 1160 tst->target_size += tb->size; 1161 if (tb->size > tst->max_target_size) { 1162 tst->max_target_size = tb->size; 1163 } 1164 if (tb_page_addr1(tb) != -1) { 1165 tst->cross_page++; 1166 } 1167 if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) { 1168 tst->direct_jmp_count++; 1169 if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) { 1170 tst->direct_jmp2_count++; 1171 } 1172 } 1173 return false; 1174 } 1175 1176 void dump_exec_info(GString *buf) 1177 { 1178 struct tb_tree_stats tst = {}; 1179 struct qht_stats hst; 1180 size_t nb_tbs, flush_full, flush_part, flush_elide; 1181 1182 tcg_tb_foreach(tb_tree_stats_iter, &tst); 1183 nb_tbs = tst.nb_tbs; 1184 /* XXX: avoid using doubles ? */ 1185 g_string_append_printf(buf, "Translation buffer state:\n"); 1186 /* 1187 * Report total code size including the padding and TB structs; 1188 * otherwise users might think "-accel tcg,tb-size" is not honoured. 1189 * For avg host size we use the precise numbers from tb_tree_stats though. 1190 */ 1191 g_string_append_printf(buf, "gen code size %zu/%zu\n", 1192 tcg_code_size(), tcg_code_capacity()); 1193 g_string_append_printf(buf, "TB count %zu\n", nb_tbs); 1194 g_string_append_printf(buf, "TB avg target size %zu max=%zu bytes\n", 1195 nb_tbs ? tst.target_size / nb_tbs : 0, 1196 tst.max_target_size); 1197 g_string_append_printf(buf, "TB avg host size %zu bytes " 1198 "(expansion ratio: %0.1f)\n", 1199 nb_tbs ? tst.host_size / nb_tbs : 0, 1200 tst.target_size ? 1201 (double)tst.host_size / tst.target_size : 0); 1202 g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n", 1203 tst.cross_page, 1204 nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0); 1205 g_string_append_printf(buf, "direct jump count %zu (%zu%%) " 1206 "(2 jumps=%zu %zu%%)\n", 1207 tst.direct_jmp_count, 1208 nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0, 1209 tst.direct_jmp2_count, 1210 nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0); 1211 1212 qht_statistics_init(&tb_ctx.htable, &hst); 1213 print_qht_statistics(hst, buf); 1214 qht_statistics_destroy(&hst); 1215 1216 g_string_append_printf(buf, "\nStatistics:\n"); 1217 g_string_append_printf(buf, "TB flush count %u\n", 1218 qatomic_read(&tb_ctx.tb_flush_count)); 1219 g_string_append_printf(buf, "TB invalidate count %u\n", 1220 qatomic_read(&tb_ctx.tb_phys_invalidate_count)); 1221 1222 tlb_flush_counts(&flush_full, &flush_part, &flush_elide); 1223 g_string_append_printf(buf, "TLB full flushes %zu\n", flush_full); 1224 g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part); 1225 g_string_append_printf(buf, "TLB elided flushes %zu\n", flush_elide); 1226 tcg_dump_info(buf); 1227 } 1228 1229 #else /* CONFIG_USER_ONLY */ 1230 1231 void cpu_interrupt(CPUState *cpu, int mask) 1232 { 1233 g_assert(qemu_mutex_iothread_locked()); 1234 cpu->interrupt_request |= mask; 1235 qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1); 1236 } 1237 1238 /* 1239 * Walks guest process memory "regions" one by one 1240 * and calls callback function 'fn' for each region. 1241 */ 1242 struct walk_memory_regions_data { 1243 walk_memory_regions_fn fn; 1244 void *priv; 1245 target_ulong start; 1246 int prot; 1247 }; 1248 1249 static int walk_memory_regions_end(struct walk_memory_regions_data *data, 1250 target_ulong end, int new_prot) 1251 { 1252 if (data->start != -1u) { 1253 int rc = data->fn(data->priv, data->start, end, data->prot); 1254 if (rc != 0) { 1255 return rc; 1256 } 1257 } 1258 1259 data->start = (new_prot ? end : -1u); 1260 data->prot = new_prot; 1261 1262 return 0; 1263 } 1264 1265 static int walk_memory_regions_1(struct walk_memory_regions_data *data, 1266 target_ulong base, int level, void **lp) 1267 { 1268 target_ulong pa; 1269 int i, rc; 1270 1271 if (*lp == NULL) { 1272 return walk_memory_regions_end(data, base, 0); 1273 } 1274 1275 if (level == 0) { 1276 PageDesc *pd = *lp; 1277 1278 for (i = 0; i < V_L2_SIZE; ++i) { 1279 int prot = pd[i].flags; 1280 1281 pa = base | (i << TARGET_PAGE_BITS); 1282 if (prot != data->prot) { 1283 rc = walk_memory_regions_end(data, pa, prot); 1284 if (rc != 0) { 1285 return rc; 1286 } 1287 } 1288 } 1289 } else { 1290 void **pp = *lp; 1291 1292 for (i = 0; i < V_L2_SIZE; ++i) { 1293 pa = base | ((target_ulong)i << 1294 (TARGET_PAGE_BITS + V_L2_BITS * level)); 1295 rc = walk_memory_regions_1(data, pa, level - 1, pp + i); 1296 if (rc != 0) { 1297 return rc; 1298 } 1299 } 1300 } 1301 1302 return 0; 1303 } 1304 1305 int walk_memory_regions(void *priv, walk_memory_regions_fn fn) 1306 { 1307 struct walk_memory_regions_data data; 1308 uintptr_t i, l1_sz = v_l1_size; 1309 1310 data.fn = fn; 1311 data.priv = priv; 1312 data.start = -1u; 1313 data.prot = 0; 1314 1315 for (i = 0; i < l1_sz; i++) { 1316 target_ulong base = i << (v_l1_shift + TARGET_PAGE_BITS); 1317 int rc = walk_memory_regions_1(&data, base, v_l2_levels, l1_map + i); 1318 if (rc != 0) { 1319 return rc; 1320 } 1321 } 1322 1323 return walk_memory_regions_end(&data, 0, 0); 1324 } 1325 1326 static int dump_region(void *priv, target_ulong start, 1327 target_ulong end, unsigned long prot) 1328 { 1329 FILE *f = (FILE *)priv; 1330 1331 (void) fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx 1332 " "TARGET_FMT_lx" %c%c%c\n", 1333 start, end, end - start, 1334 ((prot & PAGE_READ) ? 'r' : '-'), 1335 ((prot & PAGE_WRITE) ? 'w' : '-'), 1336 ((prot & PAGE_EXEC) ? 'x' : '-')); 1337 1338 return 0; 1339 } 1340 1341 /* dump memory mappings */ 1342 void page_dump(FILE *f) 1343 { 1344 const int length = sizeof(target_ulong) * 2; 1345 (void) fprintf(f, "%-*s %-*s %-*s %s\n", 1346 length, "start", length, "end", length, "size", "prot"); 1347 walk_memory_regions(f, dump_region); 1348 } 1349 1350 int page_get_flags(target_ulong address) 1351 { 1352 PageDesc *p; 1353 1354 p = page_find(address >> TARGET_PAGE_BITS); 1355 if (!p) { 1356 return 0; 1357 } 1358 return p->flags; 1359 } 1360 1361 /* 1362 * Allow the target to decide if PAGE_TARGET_[12] may be reset. 1363 * By default, they are not kept. 1364 */ 1365 #ifndef PAGE_TARGET_STICKY 1366 #define PAGE_TARGET_STICKY 0 1367 #endif 1368 #define PAGE_STICKY (PAGE_ANON | PAGE_PASSTHROUGH | PAGE_TARGET_STICKY) 1369 1370 /* Modify the flags of a page and invalidate the code if necessary. 1371 The flag PAGE_WRITE_ORG is positioned automatically depending 1372 on PAGE_WRITE. The mmap_lock should already be held. */ 1373 void page_set_flags(target_ulong start, target_ulong end, int flags) 1374 { 1375 target_ulong addr, len; 1376 bool reset, inval_tb = false; 1377 1378 /* This function should never be called with addresses outside the 1379 guest address space. If this assert fires, it probably indicates 1380 a missing call to h2g_valid. */ 1381 assert(end - 1 <= GUEST_ADDR_MAX); 1382 assert(start < end); 1383 /* Only set PAGE_ANON with new mappings. */ 1384 assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET)); 1385 assert_memory_lock(); 1386 1387 start = start & TARGET_PAGE_MASK; 1388 end = TARGET_PAGE_ALIGN(end); 1389 1390 if (flags & PAGE_WRITE) { 1391 flags |= PAGE_WRITE_ORG; 1392 } 1393 reset = !(flags & PAGE_VALID) || (flags & PAGE_RESET); 1394 if (reset) { 1395 page_reset_target_data(start, end); 1396 } 1397 flags &= ~PAGE_RESET; 1398 1399 for (addr = start, len = end - start; 1400 len != 0; 1401 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) { 1402 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, true); 1403 1404 /* 1405 * If the page was executable, but is reset, or is no longer 1406 * executable, or has become writable, then invalidate any code. 1407 */ 1408 if ((p->flags & PAGE_EXEC) 1409 && (reset || 1410 !(flags & PAGE_EXEC) || 1411 (flags & ~p->flags & PAGE_WRITE))) { 1412 inval_tb = true; 1413 } 1414 /* Using mprotect on a page does not change sticky bits. */ 1415 p->flags = (reset ? 0 : p->flags & PAGE_STICKY) | flags; 1416 } 1417 1418 if (inval_tb) { 1419 tb_invalidate_phys_range(start, end); 1420 } 1421 } 1422 1423 int page_check_range(target_ulong start, target_ulong len, int flags) 1424 { 1425 PageDesc *p; 1426 target_ulong end; 1427 target_ulong addr; 1428 1429 /* This function should never be called with addresses outside the 1430 guest address space. If this assert fires, it probably indicates 1431 a missing call to h2g_valid. */ 1432 if (TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS) { 1433 assert(start < ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS)); 1434 } 1435 1436 if (len == 0) { 1437 return 0; 1438 } 1439 if (start + len - 1 < start) { 1440 /* We've wrapped around. */ 1441 return -1; 1442 } 1443 1444 /* must do before we loose bits in the next step */ 1445 end = TARGET_PAGE_ALIGN(start + len); 1446 start = start & TARGET_PAGE_MASK; 1447 1448 for (addr = start, len = end - start; 1449 len != 0; 1450 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) { 1451 p = page_find(addr >> TARGET_PAGE_BITS); 1452 if (!p) { 1453 return -1; 1454 } 1455 if (!(p->flags & PAGE_VALID)) { 1456 return -1; 1457 } 1458 1459 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ)) { 1460 return -1; 1461 } 1462 if (flags & PAGE_WRITE) { 1463 if (!(p->flags & PAGE_WRITE_ORG)) { 1464 return -1; 1465 } 1466 /* unprotect the page if it was put read-only because it 1467 contains translated code */ 1468 if (!(p->flags & PAGE_WRITE)) { 1469 if (!page_unprotect(addr, 0)) { 1470 return -1; 1471 } 1472 } 1473 } 1474 } 1475 return 0; 1476 } 1477 1478 void page_protect(tb_page_addr_t page_addr) 1479 { 1480 target_ulong addr; 1481 PageDesc *p; 1482 int prot; 1483 1484 p = page_find(page_addr >> TARGET_PAGE_BITS); 1485 if (p && (p->flags & PAGE_WRITE)) { 1486 /* 1487 * Force the host page as non writable (writes will have a page fault + 1488 * mprotect overhead). 1489 */ 1490 page_addr &= qemu_host_page_mask; 1491 prot = 0; 1492 for (addr = page_addr; addr < page_addr + qemu_host_page_size; 1493 addr += TARGET_PAGE_SIZE) { 1494 1495 p = page_find(addr >> TARGET_PAGE_BITS); 1496 if (!p) { 1497 continue; 1498 } 1499 prot |= p->flags; 1500 p->flags &= ~PAGE_WRITE; 1501 } 1502 mprotect(g2h_untagged(page_addr), qemu_host_page_size, 1503 (prot & PAGE_BITS) & ~PAGE_WRITE); 1504 } 1505 } 1506 1507 /* called from signal handler: invalidate the code and unprotect the 1508 * page. Return 0 if the fault was not handled, 1 if it was handled, 1509 * and 2 if it was handled but the caller must cause the TB to be 1510 * immediately exited. (We can only return 2 if the 'pc' argument is 1511 * non-zero.) 1512 */ 1513 int page_unprotect(target_ulong address, uintptr_t pc) 1514 { 1515 unsigned int prot; 1516 bool current_tb_invalidated; 1517 PageDesc *p; 1518 target_ulong host_start, host_end, addr; 1519 1520 /* Technically this isn't safe inside a signal handler. However we 1521 know this only ever happens in a synchronous SEGV handler, so in 1522 practice it seems to be ok. */ 1523 mmap_lock(); 1524 1525 p = page_find(address >> TARGET_PAGE_BITS); 1526 if (!p) { 1527 mmap_unlock(); 1528 return 0; 1529 } 1530 1531 /* if the page was really writable, then we change its 1532 protection back to writable */ 1533 if (p->flags & PAGE_WRITE_ORG) { 1534 current_tb_invalidated = false; 1535 if (p->flags & PAGE_WRITE) { 1536 /* If the page is actually marked WRITE then assume this is because 1537 * this thread raced with another one which got here first and 1538 * set the page to PAGE_WRITE and did the TB invalidate for us. 1539 */ 1540 #ifdef TARGET_HAS_PRECISE_SMC 1541 TranslationBlock *current_tb = tcg_tb_lookup(pc); 1542 if (current_tb) { 1543 current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID; 1544 } 1545 #endif 1546 } else { 1547 host_start = address & qemu_host_page_mask; 1548 host_end = host_start + qemu_host_page_size; 1549 1550 prot = 0; 1551 for (addr = host_start; addr < host_end; addr += TARGET_PAGE_SIZE) { 1552 p = page_find(addr >> TARGET_PAGE_BITS); 1553 p->flags |= PAGE_WRITE; 1554 prot |= p->flags; 1555 1556 /* and since the content will be modified, we must invalidate 1557 the corresponding translated code. */ 1558 current_tb_invalidated |= 1559 tb_invalidate_phys_page_unwind(addr, pc); 1560 } 1561 mprotect((void *)g2h_untagged(host_start), qemu_host_page_size, 1562 prot & PAGE_BITS); 1563 } 1564 mmap_unlock(); 1565 /* If current TB was invalidated return to main loop */ 1566 return current_tb_invalidated ? 2 : 1; 1567 } 1568 mmap_unlock(); 1569 return 0; 1570 } 1571 #endif /* CONFIG_USER_ONLY */ 1572 1573 /* 1574 * Called by generic code at e.g. cpu reset after cpu creation, 1575 * therefore we must be prepared to allocate the jump cache. 1576 */ 1577 void tcg_flush_jmp_cache(CPUState *cpu) 1578 { 1579 CPUJumpCache *jc = cpu->tb_jmp_cache; 1580 1581 /* During early initialization, the cache may not yet be allocated. */ 1582 if (unlikely(jc == NULL)) { 1583 return; 1584 } 1585 1586 for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) { 1587 qatomic_set(&jc->array[i].tb, NULL); 1588 } 1589 } 1590 1591 /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */ 1592 void tcg_flush_softmmu_tlb(CPUState *cs) 1593 { 1594 #ifdef CONFIG_SOFTMMU 1595 tlb_flush(cs); 1596 #endif 1597 } 1598