1 /* 2 * Memory region management for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 #include "qemu/units.h" 27 #include "qemu/madvise.h" 28 #include "qemu/mprotect.h" 29 #include "qemu/cacheinfo.h" 30 #include "qapi/error.h" 31 #include "exec/exec-all.h" 32 #include "tcg/tcg.h" 33 #include "tcg-internal.h" 34 35 36 struct tcg_region_tree { 37 QemuMutex lock; 38 GTree *tree; 39 /* padding to avoid false sharing is computed at run-time */ 40 }; 41 42 /* 43 * We divide code_gen_buffer into equally-sized "regions" that TCG threads 44 * dynamically allocate from as demand dictates. Given appropriate region 45 * sizing, this minimizes flushes even when some TCG threads generate a lot 46 * more code than others. 47 */ 48 struct tcg_region_state { 49 QemuMutex lock; 50 51 /* fields set at init time */ 52 void *start_aligned; 53 void *after_prologue; 54 size_t n; 55 size_t size; /* size of one region */ 56 size_t stride; /* .size + guard size */ 57 size_t total_size; /* size of entire buffer, >= n * stride */ 58 59 /* fields protected by the lock */ 60 size_t current; /* current region index */ 61 size_t agg_size_full; /* aggregate size of full regions */ 62 }; 63 64 static struct tcg_region_state region; 65 66 /* 67 * This is an array of struct tcg_region_tree's, with padding. 68 * We use void * to simplify the computation of region_trees[i]; each 69 * struct is found every tree_size bytes. 70 */ 71 static void *region_trees; 72 static size_t tree_size; 73 74 bool in_code_gen_buffer(const void *p) 75 { 76 /* 77 * Much like it is valid to have a pointer to the byte past the 78 * end of an array (so long as you don't dereference it), allow 79 * a pointer to the byte past the end of the code gen buffer. 80 */ 81 return (size_t)(p - region.start_aligned) <= region.total_size; 82 } 83 84 #ifdef CONFIG_DEBUG_TCG 85 const void *tcg_splitwx_to_rx(void *rw) 86 { 87 /* Pass NULL pointers unchanged. */ 88 if (rw) { 89 g_assert(in_code_gen_buffer(rw)); 90 rw += tcg_splitwx_diff; 91 } 92 return rw; 93 } 94 95 void *tcg_splitwx_to_rw(const void *rx) 96 { 97 /* Pass NULL pointers unchanged. */ 98 if (rx) { 99 rx -= tcg_splitwx_diff; 100 /* Assert that we end with a pointer in the rw region. */ 101 g_assert(in_code_gen_buffer(rx)); 102 } 103 return (void *)rx; 104 } 105 #endif /* CONFIG_DEBUG_TCG */ 106 107 /* compare a pointer @ptr and a tb_tc @s */ 108 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s) 109 { 110 if (ptr >= s->ptr + s->size) { 111 return 1; 112 } else if (ptr < s->ptr) { 113 return -1; 114 } 115 return 0; 116 } 117 118 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp, gpointer userdata) 119 { 120 const struct tb_tc *a = ap; 121 const struct tb_tc *b = bp; 122 123 /* 124 * When both sizes are set, we know this isn't a lookup. 125 * This is the most likely case: every TB must be inserted; lookups 126 * are a lot less frequent. 127 */ 128 if (likely(a->size && b->size)) { 129 if (a->ptr > b->ptr) { 130 return 1; 131 } else if (a->ptr < b->ptr) { 132 return -1; 133 } 134 /* a->ptr == b->ptr should happen only on deletions */ 135 g_assert(a->size == b->size); 136 return 0; 137 } 138 /* 139 * All lookups have either .size field set to 0. 140 * From the glib sources we see that @ap is always the lookup key. However 141 * the docs provide no guarantee, so we just mark this case as likely. 142 */ 143 if (likely(a->size == 0)) { 144 return ptr_cmp_tb_tc(a->ptr, b); 145 } 146 return ptr_cmp_tb_tc(b->ptr, a); 147 } 148 149 static void tb_destroy(gpointer value) 150 { 151 TranslationBlock *tb = value; 152 qemu_spin_destroy(&tb->jmp_lock); 153 } 154 155 static void tcg_region_trees_init(void) 156 { 157 size_t i; 158 159 tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize); 160 region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size); 161 for (i = 0; i < region.n; i++) { 162 struct tcg_region_tree *rt = region_trees + i * tree_size; 163 164 qemu_mutex_init(&rt->lock); 165 rt->tree = g_tree_new_full(tb_tc_cmp, NULL, NULL, tb_destroy); 166 } 167 } 168 169 static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p) 170 { 171 size_t region_idx; 172 173 /* 174 * Like tcg_splitwx_to_rw, with no assert. The pc may come from 175 * a signal handler over which the caller has no control. 176 */ 177 if (!in_code_gen_buffer(p)) { 178 p -= tcg_splitwx_diff; 179 if (!in_code_gen_buffer(p)) { 180 return NULL; 181 } 182 } 183 184 if (p < region.start_aligned) { 185 region_idx = 0; 186 } else { 187 ptrdiff_t offset = p - region.start_aligned; 188 189 if (offset > region.stride * (region.n - 1)) { 190 region_idx = region.n - 1; 191 } else { 192 region_idx = offset / region.stride; 193 } 194 } 195 return region_trees + region_idx * tree_size; 196 } 197 198 void tcg_tb_insert(TranslationBlock *tb) 199 { 200 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr); 201 202 g_assert(rt != NULL); 203 qemu_mutex_lock(&rt->lock); 204 g_tree_insert(rt->tree, &tb->tc, tb); 205 qemu_mutex_unlock(&rt->lock); 206 } 207 208 void tcg_tb_remove(TranslationBlock *tb) 209 { 210 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr); 211 212 g_assert(rt != NULL); 213 qemu_mutex_lock(&rt->lock); 214 g_tree_remove(rt->tree, &tb->tc); 215 qemu_mutex_unlock(&rt->lock); 216 } 217 218 /* 219 * Find the TB 'tb' such that 220 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size 221 * Return NULL if not found. 222 */ 223 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr) 224 { 225 struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr); 226 TranslationBlock *tb; 227 struct tb_tc s = { .ptr = (void *)tc_ptr }; 228 229 if (rt == NULL) { 230 return NULL; 231 } 232 233 qemu_mutex_lock(&rt->lock); 234 tb = g_tree_lookup(rt->tree, &s); 235 qemu_mutex_unlock(&rt->lock); 236 return tb; 237 } 238 239 static void tcg_region_tree_lock_all(void) 240 { 241 size_t i; 242 243 for (i = 0; i < region.n; i++) { 244 struct tcg_region_tree *rt = region_trees + i * tree_size; 245 246 qemu_mutex_lock(&rt->lock); 247 } 248 } 249 250 static void tcg_region_tree_unlock_all(void) 251 { 252 size_t i; 253 254 for (i = 0; i < region.n; i++) { 255 struct tcg_region_tree *rt = region_trees + i * tree_size; 256 257 qemu_mutex_unlock(&rt->lock); 258 } 259 } 260 261 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data) 262 { 263 size_t i; 264 265 tcg_region_tree_lock_all(); 266 for (i = 0; i < region.n; i++) { 267 struct tcg_region_tree *rt = region_trees + i * tree_size; 268 269 g_tree_foreach(rt->tree, func, user_data); 270 } 271 tcg_region_tree_unlock_all(); 272 } 273 274 size_t tcg_nb_tbs(void) 275 { 276 size_t nb_tbs = 0; 277 size_t i; 278 279 tcg_region_tree_lock_all(); 280 for (i = 0; i < region.n; i++) { 281 struct tcg_region_tree *rt = region_trees + i * tree_size; 282 283 nb_tbs += g_tree_nnodes(rt->tree); 284 } 285 tcg_region_tree_unlock_all(); 286 return nb_tbs; 287 } 288 289 static void tcg_region_tree_reset_all(void) 290 { 291 size_t i; 292 293 tcg_region_tree_lock_all(); 294 for (i = 0; i < region.n; i++) { 295 struct tcg_region_tree *rt = region_trees + i * tree_size; 296 297 /* Increment the refcount first so that destroy acts as a reset */ 298 g_tree_ref(rt->tree); 299 g_tree_destroy(rt->tree); 300 } 301 tcg_region_tree_unlock_all(); 302 } 303 304 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend) 305 { 306 void *start, *end; 307 308 start = region.start_aligned + curr_region * region.stride; 309 end = start + region.size; 310 311 if (curr_region == 0) { 312 start = region.after_prologue; 313 } 314 /* The final region may have a few extra pages due to earlier rounding. */ 315 if (curr_region == region.n - 1) { 316 end = region.start_aligned + region.total_size; 317 } 318 319 *pstart = start; 320 *pend = end; 321 } 322 323 static void tcg_region_assign(TCGContext *s, size_t curr_region) 324 { 325 void *start, *end; 326 327 tcg_region_bounds(curr_region, &start, &end); 328 329 s->code_gen_buffer = start; 330 s->code_gen_ptr = start; 331 s->code_gen_buffer_size = end - start; 332 s->code_gen_highwater = end - TCG_HIGHWATER; 333 } 334 335 static bool tcg_region_alloc__locked(TCGContext *s) 336 { 337 if (region.current == region.n) { 338 return true; 339 } 340 tcg_region_assign(s, region.current); 341 region.current++; 342 return false; 343 } 344 345 /* 346 * Request a new region once the one in use has filled up. 347 * Returns true on error. 348 */ 349 bool tcg_region_alloc(TCGContext *s) 350 { 351 bool err; 352 /* read the region size now; alloc__locked will overwrite it on success */ 353 size_t size_full = s->code_gen_buffer_size; 354 355 qemu_mutex_lock(®ion.lock); 356 err = tcg_region_alloc__locked(s); 357 if (!err) { 358 region.agg_size_full += size_full - TCG_HIGHWATER; 359 } 360 qemu_mutex_unlock(®ion.lock); 361 return err; 362 } 363 364 /* 365 * Perform a context's first region allocation. 366 * This function does _not_ increment region.agg_size_full. 367 */ 368 static void tcg_region_initial_alloc__locked(TCGContext *s) 369 { 370 bool err = tcg_region_alloc__locked(s); 371 g_assert(!err); 372 } 373 374 void tcg_region_initial_alloc(TCGContext *s) 375 { 376 qemu_mutex_lock(®ion.lock); 377 tcg_region_initial_alloc__locked(s); 378 qemu_mutex_unlock(®ion.lock); 379 } 380 381 /* Call from a safe-work context */ 382 void tcg_region_reset_all(void) 383 { 384 unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs); 385 unsigned int i; 386 387 qemu_mutex_lock(®ion.lock); 388 region.current = 0; 389 region.agg_size_full = 0; 390 391 for (i = 0; i < n_ctxs; i++) { 392 TCGContext *s = qatomic_read(&tcg_ctxs[i]); 393 tcg_region_initial_alloc__locked(s); 394 } 395 qemu_mutex_unlock(®ion.lock); 396 397 tcg_region_tree_reset_all(); 398 } 399 400 static size_t tcg_n_regions(size_t tb_size, unsigned max_cpus) 401 { 402 #ifdef CONFIG_USER_ONLY 403 return 1; 404 #else 405 size_t n_regions; 406 407 /* 408 * It is likely that some vCPUs will translate more code than others, 409 * so we first try to set more regions than max_cpus, with those regions 410 * being of reasonable size. If that's not possible we make do by evenly 411 * dividing the code_gen_buffer among the vCPUs. 412 */ 413 /* Use a single region if all we have is one vCPU thread */ 414 if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) { 415 return 1; 416 } 417 418 /* 419 * Try to have more regions than max_cpus, with each region being >= 2 MB. 420 * If we can't, then just allocate one region per vCPU thread. 421 */ 422 n_regions = tb_size / (2 * MiB); 423 if (n_regions <= max_cpus) { 424 return max_cpus; 425 } 426 return MIN(n_regions, max_cpus * 8); 427 #endif 428 } 429 430 /* 431 * Minimum size of the code gen buffer. This number is randomly chosen, 432 * but not so small that we can't have a fair number of TB's live. 433 * 434 * Maximum size, MAX_CODE_GEN_BUFFER_SIZE, is defined in tcg-target.h. 435 * Unless otherwise indicated, this is constrained by the range of 436 * direct branches on the host cpu, as used by the TCG implementation 437 * of goto_tb. 438 */ 439 #define MIN_CODE_GEN_BUFFER_SIZE (1 * MiB) 440 441 #if TCG_TARGET_REG_BITS == 32 442 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32 * MiB) 443 #ifdef CONFIG_USER_ONLY 444 /* 445 * For user mode on smaller 32 bit systems we may run into trouble 446 * allocating big chunks of data in the right place. On these systems 447 * we utilise a static code generation buffer directly in the binary. 448 */ 449 #define USE_STATIC_CODE_GEN_BUFFER 450 #endif 451 #else /* TCG_TARGET_REG_BITS == 64 */ 452 #ifdef CONFIG_USER_ONLY 453 /* 454 * As user-mode emulation typically means running multiple instances 455 * of the translator don't go too nuts with our default code gen 456 * buffer lest we make things too hard for the OS. 457 */ 458 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (128 * MiB) 459 #else 460 /* 461 * We expect most system emulation to run one or two guests per host. 462 * Users running large scale system emulation may want to tweak their 463 * runtime setup via the tb-size control on the command line. 464 */ 465 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (1 * GiB) 466 #endif 467 #endif 468 469 #define DEFAULT_CODE_GEN_BUFFER_SIZE \ 470 (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \ 471 ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE) 472 473 #ifdef USE_STATIC_CODE_GEN_BUFFER 474 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE] 475 __attribute__((aligned(CODE_GEN_ALIGN))); 476 477 static int alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp) 478 { 479 void *buf, *end; 480 size_t size; 481 482 if (splitwx > 0) { 483 error_setg(errp, "jit split-wx not supported"); 484 return -1; 485 } 486 487 /* page-align the beginning and end of the buffer */ 488 buf = static_code_gen_buffer; 489 end = static_code_gen_buffer + sizeof(static_code_gen_buffer); 490 buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size); 491 end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size); 492 493 size = end - buf; 494 495 /* Honor a command-line option limiting the size of the buffer. */ 496 if (size > tb_size) { 497 size = QEMU_ALIGN_DOWN(tb_size, qemu_real_host_page_size); 498 } 499 500 region.start_aligned = buf; 501 region.total_size = size; 502 503 return PROT_READ | PROT_WRITE; 504 } 505 #elif defined(_WIN32) 506 static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp) 507 { 508 void *buf; 509 510 if (splitwx > 0) { 511 error_setg(errp, "jit split-wx not supported"); 512 return -1; 513 } 514 515 buf = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT, 516 PAGE_EXECUTE_READWRITE); 517 if (buf == NULL) { 518 error_setg_win32(errp, GetLastError(), 519 "allocate %zu bytes for jit buffer", size); 520 return false; 521 } 522 523 region.start_aligned = buf; 524 region.total_size = size; 525 526 return PAGE_READ | PAGE_WRITE | PAGE_EXEC; 527 } 528 #else 529 static int alloc_code_gen_buffer_anon(size_t size, int prot, 530 int flags, Error **errp) 531 { 532 void *buf; 533 534 buf = mmap(NULL, size, prot, flags, -1, 0); 535 if (buf == MAP_FAILED) { 536 error_setg_errno(errp, errno, 537 "allocate %zu bytes for jit buffer", size); 538 return -1; 539 } 540 541 region.start_aligned = buf; 542 region.total_size = size; 543 return prot; 544 } 545 546 #ifndef CONFIG_TCG_INTERPRETER 547 #ifdef CONFIG_POSIX 548 #include "qemu/memfd.h" 549 550 static bool alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp) 551 { 552 void *buf_rw = NULL, *buf_rx = MAP_FAILED; 553 int fd = -1; 554 555 buf_rw = qemu_memfd_alloc("tcg-jit", size, 0, &fd, errp); 556 if (buf_rw == NULL) { 557 goto fail; 558 } 559 560 buf_rx = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0); 561 if (buf_rx == MAP_FAILED) { 562 goto fail_rx; 563 } 564 565 close(fd); 566 region.start_aligned = buf_rw; 567 region.total_size = size; 568 tcg_splitwx_diff = buf_rx - buf_rw; 569 570 return PROT_READ | PROT_WRITE; 571 572 fail_rx: 573 error_setg_errno(errp, errno, "failed to map shared memory for execute"); 574 fail: 575 if (buf_rx != MAP_FAILED) { 576 munmap(buf_rx, size); 577 } 578 if (buf_rw) { 579 munmap(buf_rw, size); 580 } 581 if (fd >= 0) { 582 close(fd); 583 } 584 return -1; 585 } 586 #endif /* CONFIG_POSIX */ 587 588 #ifdef CONFIG_DARWIN 589 #include <mach/mach.h> 590 591 extern kern_return_t mach_vm_remap(vm_map_t target_task, 592 mach_vm_address_t *target_address, 593 mach_vm_size_t size, 594 mach_vm_offset_t mask, 595 int flags, 596 vm_map_t src_task, 597 mach_vm_address_t src_address, 598 boolean_t copy, 599 vm_prot_t *cur_protection, 600 vm_prot_t *max_protection, 601 vm_inherit_t inheritance); 602 603 static int alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp) 604 { 605 kern_return_t ret; 606 mach_vm_address_t buf_rw, buf_rx; 607 vm_prot_t cur_prot, max_prot; 608 609 /* Map the read-write portion via normal anon memory. */ 610 if (!alloc_code_gen_buffer_anon(size, PROT_READ | PROT_WRITE, 611 MAP_PRIVATE | MAP_ANONYMOUS, errp)) { 612 return -1; 613 } 614 615 buf_rw = (mach_vm_address_t)region.start_aligned; 616 buf_rx = 0; 617 ret = mach_vm_remap(mach_task_self(), 618 &buf_rx, 619 size, 620 0, 621 VM_FLAGS_ANYWHERE, 622 mach_task_self(), 623 buf_rw, 624 false, 625 &cur_prot, 626 &max_prot, 627 VM_INHERIT_NONE); 628 if (ret != KERN_SUCCESS) { 629 /* TODO: Convert "ret" to a human readable error message. */ 630 error_setg(errp, "vm_remap for jit splitwx failed"); 631 munmap((void *)buf_rw, size); 632 return -1; 633 } 634 635 if (mprotect((void *)buf_rx, size, PROT_READ | PROT_EXEC) != 0) { 636 error_setg_errno(errp, errno, "mprotect for jit splitwx"); 637 munmap((void *)buf_rx, size); 638 munmap((void *)buf_rw, size); 639 return -1; 640 } 641 642 tcg_splitwx_diff = buf_rx - buf_rw; 643 return PROT_READ | PROT_WRITE; 644 } 645 #endif /* CONFIG_DARWIN */ 646 #endif /* CONFIG_TCG_INTERPRETER */ 647 648 static int alloc_code_gen_buffer_splitwx(size_t size, Error **errp) 649 { 650 #ifndef CONFIG_TCG_INTERPRETER 651 # ifdef CONFIG_DARWIN 652 return alloc_code_gen_buffer_splitwx_vmremap(size, errp); 653 # endif 654 # ifdef CONFIG_POSIX 655 return alloc_code_gen_buffer_splitwx_memfd(size, errp); 656 # endif 657 #endif 658 error_setg(errp, "jit split-wx not supported"); 659 return -1; 660 } 661 662 static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp) 663 { 664 ERRP_GUARD(); 665 int prot, flags; 666 667 if (splitwx) { 668 prot = alloc_code_gen_buffer_splitwx(size, errp); 669 if (prot >= 0) { 670 return prot; 671 } 672 /* 673 * If splitwx force-on (1), fail; 674 * if splitwx default-on (-1), fall through to splitwx off. 675 */ 676 if (splitwx > 0) { 677 return -1; 678 } 679 error_free_or_abort(errp); 680 } 681 682 /* 683 * macOS 11.2 has a bug (Apple Feedback FB8994773) in which mprotect 684 * rejects a permission change from RWX -> NONE when reserving the 685 * guard pages later. We can go the other way with the same number 686 * of syscalls, so always begin with PROT_NONE. 687 */ 688 prot = PROT_NONE; 689 flags = MAP_PRIVATE | MAP_ANONYMOUS; 690 #ifdef CONFIG_DARWIN 691 /* Applicable to both iOS and macOS (Apple Silicon). */ 692 if (!splitwx) { 693 flags |= MAP_JIT; 694 } 695 #endif 696 697 return alloc_code_gen_buffer_anon(size, prot, flags, errp); 698 } 699 #endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */ 700 701 /* 702 * Initializes region partitioning. 703 * 704 * Called at init time from the parent thread (i.e. the one calling 705 * tcg_context_init), after the target's TCG globals have been set. 706 * 707 * Region partitioning works by splitting code_gen_buffer into separate regions, 708 * and then assigning regions to TCG threads so that the threads can translate 709 * code in parallel without synchronization. 710 * 711 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at 712 * least max_cpus regions in MTTCG. In !MTTCG we use a single region. 713 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...]) 714 * must have been parsed before calling this function, since it calls 715 * qemu_tcg_mttcg_enabled(). 716 * 717 * In user-mode we use a single region. Having multiple regions in user-mode 718 * is not supported, because the number of vCPU threads (recall that each thread 719 * spawned by the guest corresponds to a vCPU thread) is only bounded by the 720 * OS, and usually this number is huge (tens of thousands is not uncommon). 721 * Thus, given this large bound on the number of vCPU threads and the fact 722 * that code_gen_buffer is allocated at compile-time, we cannot guarantee 723 * that the availability of at least one region per vCPU thread. 724 * 725 * However, this user-mode limitation is unlikely to be a significant problem 726 * in practice. Multi-threaded guests share most if not all of their translated 727 * code, which makes parallel code generation less appealing than in softmmu. 728 */ 729 void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus) 730 { 731 const size_t page_size = qemu_real_host_page_size; 732 size_t region_size; 733 int have_prot, need_prot; 734 735 /* Size the buffer. */ 736 if (tb_size == 0) { 737 size_t phys_mem = qemu_get_host_physmem(); 738 if (phys_mem == 0) { 739 tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE; 740 } else { 741 tb_size = QEMU_ALIGN_DOWN(phys_mem / 8, page_size); 742 tb_size = MIN(DEFAULT_CODE_GEN_BUFFER_SIZE, tb_size); 743 } 744 } 745 if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) { 746 tb_size = MIN_CODE_GEN_BUFFER_SIZE; 747 } 748 if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) { 749 tb_size = MAX_CODE_GEN_BUFFER_SIZE; 750 } 751 752 have_prot = alloc_code_gen_buffer(tb_size, splitwx, &error_fatal); 753 assert(have_prot >= 0); 754 755 /* Request large pages for the buffer and the splitwx. */ 756 qemu_madvise(region.start_aligned, region.total_size, QEMU_MADV_HUGEPAGE); 757 if (tcg_splitwx_diff) { 758 qemu_madvise(region.start_aligned + tcg_splitwx_diff, 759 region.total_size, QEMU_MADV_HUGEPAGE); 760 } 761 762 /* 763 * Make region_size a multiple of page_size, using aligned as the start. 764 * As a result of this we might end up with a few extra pages at the end of 765 * the buffer; we will assign those to the last region. 766 */ 767 region.n = tcg_n_regions(tb_size, max_cpus); 768 region_size = tb_size / region.n; 769 region_size = QEMU_ALIGN_DOWN(region_size, page_size); 770 771 /* A region must have at least 2 pages; one code, one guard */ 772 g_assert(region_size >= 2 * page_size); 773 region.stride = region_size; 774 775 /* Reserve space for guard pages. */ 776 region.size = region_size - page_size; 777 region.total_size -= page_size; 778 779 /* 780 * The first region will be smaller than the others, via the prologue, 781 * which has yet to be allocated. For now, the first region begins at 782 * the page boundary. 783 */ 784 region.after_prologue = region.start_aligned; 785 786 /* init the region struct */ 787 qemu_mutex_init(®ion.lock); 788 789 /* 790 * Set guard pages in the rw buffer, as that's the one into which 791 * buffer overruns could occur. Do not set guard pages in the rx 792 * buffer -- let that one use hugepages throughout. 793 * Work with the page protections set up with the initial mapping. 794 */ 795 need_prot = PAGE_READ | PAGE_WRITE; 796 #ifndef CONFIG_TCG_INTERPRETER 797 if (tcg_splitwx_diff == 0) { 798 need_prot |= PAGE_EXEC; 799 } 800 #endif 801 for (size_t i = 0, n = region.n; i < n; i++) { 802 void *start, *end; 803 804 tcg_region_bounds(i, &start, &end); 805 if (have_prot != need_prot) { 806 int rc; 807 808 if (need_prot == (PAGE_READ | PAGE_WRITE | PAGE_EXEC)) { 809 rc = qemu_mprotect_rwx(start, end - start); 810 } else if (need_prot == (PAGE_READ | PAGE_WRITE)) { 811 rc = qemu_mprotect_rw(start, end - start); 812 } else { 813 g_assert_not_reached(); 814 } 815 if (rc) { 816 error_setg_errno(&error_fatal, errno, 817 "mprotect of jit buffer"); 818 } 819 } 820 if (have_prot != 0) { 821 /* Guard pages are nice for bug detection but are not essential. */ 822 (void)qemu_mprotect_none(end, page_size); 823 } 824 } 825 826 tcg_region_trees_init(); 827 828 /* 829 * Leave the initial context initialized to the first region. 830 * This will be the context into which we generate the prologue. 831 * It is also the only context for CONFIG_USER_ONLY. 832 */ 833 tcg_region_initial_alloc__locked(&tcg_init_ctx); 834 } 835 836 void tcg_region_prologue_set(TCGContext *s) 837 { 838 /* Deduct the prologue from the first region. */ 839 g_assert(region.start_aligned == s->code_gen_buffer); 840 region.after_prologue = s->code_ptr; 841 842 /* Recompute boundaries of the first region. */ 843 tcg_region_assign(s, 0); 844 845 /* Register the balance of the buffer with gdb. */ 846 tcg_register_jit(tcg_splitwx_to_rx(region.after_prologue), 847 region.start_aligned + region.total_size - 848 region.after_prologue); 849 } 850 851 /* 852 * Returns the size (in bytes) of all translated code (i.e. from all regions) 853 * currently in the cache. 854 * See also: tcg_code_capacity() 855 * Do not confuse with tcg_current_code_size(); that one applies to a single 856 * TCG context. 857 */ 858 size_t tcg_code_size(void) 859 { 860 unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs); 861 unsigned int i; 862 size_t total; 863 864 qemu_mutex_lock(®ion.lock); 865 total = region.agg_size_full; 866 for (i = 0; i < n_ctxs; i++) { 867 const TCGContext *s = qatomic_read(&tcg_ctxs[i]); 868 size_t size; 869 870 size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer; 871 g_assert(size <= s->code_gen_buffer_size); 872 total += size; 873 } 874 qemu_mutex_unlock(®ion.lock); 875 return total; 876 } 877 878 /* 879 * Returns the code capacity (in bytes) of the entire cache, i.e. including all 880 * regions. 881 * See also: tcg_code_size() 882 */ 883 size_t tcg_code_capacity(void) 884 { 885 size_t guard_size, capacity; 886 887 /* no need for synchronization; these variables are set at init time */ 888 guard_size = region.stride - region.size; 889 capacity = region.total_size; 890 capacity -= (region.n - 1) * guard_size; 891 capacity -= region.n * TCG_HIGHWATER; 892 893 return capacity; 894 } 895