1 /* 2 * Memory region management for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 #include "qemu/units.h" 27 #include "qemu/madvise.h" 28 #include "qemu/mprotect.h" 29 #include "qemu/memalign.h" 30 #include "qemu/cacheinfo.h" 31 #include "qapi/error.h" 32 #include "exec/exec-all.h" 33 #include "tcg/tcg.h" 34 #include "tcg-internal.h" 35 36 37 struct tcg_region_tree { 38 QemuMutex lock; 39 GTree *tree; 40 /* padding to avoid false sharing is computed at run-time */ 41 }; 42 43 /* 44 * We divide code_gen_buffer into equally-sized "regions" that TCG threads 45 * dynamically allocate from as demand dictates. Given appropriate region 46 * sizing, this minimizes flushes even when some TCG threads generate a lot 47 * more code than others. 48 */ 49 struct tcg_region_state { 50 QemuMutex lock; 51 52 /* fields set at init time */ 53 void *start_aligned; 54 void *after_prologue; 55 size_t n; 56 size_t size; /* size of one region */ 57 size_t stride; /* .size + guard size */ 58 size_t total_size; /* size of entire buffer, >= n * stride */ 59 60 /* fields protected by the lock */ 61 size_t current; /* current region index */ 62 size_t agg_size_full; /* aggregate size of full regions */ 63 }; 64 65 static struct tcg_region_state region; 66 67 /* 68 * This is an array of struct tcg_region_tree's, with padding. 69 * We use void * to simplify the computation of region_trees[i]; each 70 * struct is found every tree_size bytes. 71 */ 72 static void *region_trees; 73 static size_t tree_size; 74 75 bool in_code_gen_buffer(const void *p) 76 { 77 /* 78 * Much like it is valid to have a pointer to the byte past the 79 * end of an array (so long as you don't dereference it), allow 80 * a pointer to the byte past the end of the code gen buffer. 81 */ 82 return (size_t)(p - region.start_aligned) <= region.total_size; 83 } 84 85 #ifdef CONFIG_DEBUG_TCG 86 const void *tcg_splitwx_to_rx(void *rw) 87 { 88 /* Pass NULL pointers unchanged. */ 89 if (rw) { 90 g_assert(in_code_gen_buffer(rw)); 91 rw += tcg_splitwx_diff; 92 } 93 return rw; 94 } 95 96 void *tcg_splitwx_to_rw(const void *rx) 97 { 98 /* Pass NULL pointers unchanged. */ 99 if (rx) { 100 rx -= tcg_splitwx_diff; 101 /* Assert that we end with a pointer in the rw region. */ 102 g_assert(in_code_gen_buffer(rx)); 103 } 104 return (void *)rx; 105 } 106 #endif /* CONFIG_DEBUG_TCG */ 107 108 /* compare a pointer @ptr and a tb_tc @s */ 109 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s) 110 { 111 if (ptr >= s->ptr + s->size) { 112 return 1; 113 } else if (ptr < s->ptr) { 114 return -1; 115 } 116 return 0; 117 } 118 119 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp, gpointer userdata) 120 { 121 const struct tb_tc *a = ap; 122 const struct tb_tc *b = bp; 123 124 /* 125 * When both sizes are set, we know this isn't a lookup. 126 * This is the most likely case: every TB must be inserted; lookups 127 * are a lot less frequent. 128 */ 129 if (likely(a->size && b->size)) { 130 if (a->ptr > b->ptr) { 131 return 1; 132 } else if (a->ptr < b->ptr) { 133 return -1; 134 } 135 /* a->ptr == b->ptr should happen only on deletions */ 136 g_assert(a->size == b->size); 137 return 0; 138 } 139 /* 140 * All lookups have either .size field set to 0. 141 * From the glib sources we see that @ap is always the lookup key. However 142 * the docs provide no guarantee, so we just mark this case as likely. 143 */ 144 if (likely(a->size == 0)) { 145 return ptr_cmp_tb_tc(a->ptr, b); 146 } 147 return ptr_cmp_tb_tc(b->ptr, a); 148 } 149 150 static void tb_destroy(gpointer value) 151 { 152 TranslationBlock *tb = value; 153 qemu_spin_destroy(&tb->jmp_lock); 154 } 155 156 static void tcg_region_trees_init(void) 157 { 158 size_t i; 159 160 tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize); 161 region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size); 162 for (i = 0; i < region.n; i++) { 163 struct tcg_region_tree *rt = region_trees + i * tree_size; 164 165 qemu_mutex_init(&rt->lock); 166 rt->tree = g_tree_new_full(tb_tc_cmp, NULL, NULL, tb_destroy); 167 } 168 } 169 170 static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p) 171 { 172 size_t region_idx; 173 174 /* 175 * Like tcg_splitwx_to_rw, with no assert. The pc may come from 176 * a signal handler over which the caller has no control. 177 */ 178 if (!in_code_gen_buffer(p)) { 179 p -= tcg_splitwx_diff; 180 if (!in_code_gen_buffer(p)) { 181 return NULL; 182 } 183 } 184 185 if (p < region.start_aligned) { 186 region_idx = 0; 187 } else { 188 ptrdiff_t offset = p - region.start_aligned; 189 190 if (offset > region.stride * (region.n - 1)) { 191 region_idx = region.n - 1; 192 } else { 193 region_idx = offset / region.stride; 194 } 195 } 196 return region_trees + region_idx * tree_size; 197 } 198 199 void tcg_tb_insert(TranslationBlock *tb) 200 { 201 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr); 202 203 g_assert(rt != NULL); 204 qemu_mutex_lock(&rt->lock); 205 g_tree_insert(rt->tree, &tb->tc, tb); 206 qemu_mutex_unlock(&rt->lock); 207 } 208 209 void tcg_tb_remove(TranslationBlock *tb) 210 { 211 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr); 212 213 g_assert(rt != NULL); 214 qemu_mutex_lock(&rt->lock); 215 g_tree_remove(rt->tree, &tb->tc); 216 qemu_mutex_unlock(&rt->lock); 217 } 218 219 /* 220 * Find the TB 'tb' such that 221 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size 222 * Return NULL if not found. 223 */ 224 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr) 225 { 226 struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr); 227 TranslationBlock *tb; 228 struct tb_tc s = { .ptr = (void *)tc_ptr }; 229 230 if (rt == NULL) { 231 return NULL; 232 } 233 234 qemu_mutex_lock(&rt->lock); 235 tb = g_tree_lookup(rt->tree, &s); 236 qemu_mutex_unlock(&rt->lock); 237 return tb; 238 } 239 240 static void tcg_region_tree_lock_all(void) 241 { 242 size_t i; 243 244 for (i = 0; i < region.n; i++) { 245 struct tcg_region_tree *rt = region_trees + i * tree_size; 246 247 qemu_mutex_lock(&rt->lock); 248 } 249 } 250 251 static void tcg_region_tree_unlock_all(void) 252 { 253 size_t i; 254 255 for (i = 0; i < region.n; i++) { 256 struct tcg_region_tree *rt = region_trees + i * tree_size; 257 258 qemu_mutex_unlock(&rt->lock); 259 } 260 } 261 262 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data) 263 { 264 size_t i; 265 266 tcg_region_tree_lock_all(); 267 for (i = 0; i < region.n; i++) { 268 struct tcg_region_tree *rt = region_trees + i * tree_size; 269 270 g_tree_foreach(rt->tree, func, user_data); 271 } 272 tcg_region_tree_unlock_all(); 273 } 274 275 size_t tcg_nb_tbs(void) 276 { 277 size_t nb_tbs = 0; 278 size_t i; 279 280 tcg_region_tree_lock_all(); 281 for (i = 0; i < region.n; i++) { 282 struct tcg_region_tree *rt = region_trees + i * tree_size; 283 284 nb_tbs += g_tree_nnodes(rt->tree); 285 } 286 tcg_region_tree_unlock_all(); 287 return nb_tbs; 288 } 289 290 static void tcg_region_tree_reset_all(void) 291 { 292 size_t i; 293 294 tcg_region_tree_lock_all(); 295 for (i = 0; i < region.n; i++) { 296 struct tcg_region_tree *rt = region_trees + i * tree_size; 297 298 /* Increment the refcount first so that destroy acts as a reset */ 299 g_tree_ref(rt->tree); 300 g_tree_destroy(rt->tree); 301 } 302 tcg_region_tree_unlock_all(); 303 } 304 305 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend) 306 { 307 void *start, *end; 308 309 start = region.start_aligned + curr_region * region.stride; 310 end = start + region.size; 311 312 if (curr_region == 0) { 313 start = region.after_prologue; 314 } 315 /* The final region may have a few extra pages due to earlier rounding. */ 316 if (curr_region == region.n - 1) { 317 end = region.start_aligned + region.total_size; 318 } 319 320 *pstart = start; 321 *pend = end; 322 } 323 324 static void tcg_region_assign(TCGContext *s, size_t curr_region) 325 { 326 void *start, *end; 327 328 tcg_region_bounds(curr_region, &start, &end); 329 330 s->code_gen_buffer = start; 331 s->code_gen_ptr = start; 332 s->code_gen_buffer_size = end - start; 333 s->code_gen_highwater = end - TCG_HIGHWATER; 334 } 335 336 static bool tcg_region_alloc__locked(TCGContext *s) 337 { 338 if (region.current == region.n) { 339 return true; 340 } 341 tcg_region_assign(s, region.current); 342 region.current++; 343 return false; 344 } 345 346 /* 347 * Request a new region once the one in use has filled up. 348 * Returns true on error. 349 */ 350 bool tcg_region_alloc(TCGContext *s) 351 { 352 bool err; 353 /* read the region size now; alloc__locked will overwrite it on success */ 354 size_t size_full = s->code_gen_buffer_size; 355 356 qemu_mutex_lock(®ion.lock); 357 err = tcg_region_alloc__locked(s); 358 if (!err) { 359 region.agg_size_full += size_full - TCG_HIGHWATER; 360 } 361 qemu_mutex_unlock(®ion.lock); 362 return err; 363 } 364 365 /* 366 * Perform a context's first region allocation. 367 * This function does _not_ increment region.agg_size_full. 368 */ 369 static void tcg_region_initial_alloc__locked(TCGContext *s) 370 { 371 bool err = tcg_region_alloc__locked(s); 372 g_assert(!err); 373 } 374 375 void tcg_region_initial_alloc(TCGContext *s) 376 { 377 qemu_mutex_lock(®ion.lock); 378 tcg_region_initial_alloc__locked(s); 379 qemu_mutex_unlock(®ion.lock); 380 } 381 382 /* Call from a safe-work context */ 383 void tcg_region_reset_all(void) 384 { 385 unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs); 386 unsigned int i; 387 388 qemu_mutex_lock(®ion.lock); 389 region.current = 0; 390 region.agg_size_full = 0; 391 392 for (i = 0; i < n_ctxs; i++) { 393 TCGContext *s = qatomic_read(&tcg_ctxs[i]); 394 tcg_region_initial_alloc__locked(s); 395 } 396 qemu_mutex_unlock(®ion.lock); 397 398 tcg_region_tree_reset_all(); 399 } 400 401 static size_t tcg_n_regions(size_t tb_size, unsigned max_cpus) 402 { 403 #ifdef CONFIG_USER_ONLY 404 return 1; 405 #else 406 size_t n_regions; 407 408 /* 409 * It is likely that some vCPUs will translate more code than others, 410 * so we first try to set more regions than max_cpus, with those regions 411 * being of reasonable size. If that's not possible we make do by evenly 412 * dividing the code_gen_buffer among the vCPUs. 413 */ 414 /* Use a single region if all we have is one vCPU thread */ 415 if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) { 416 return 1; 417 } 418 419 /* 420 * Try to have more regions than max_cpus, with each region being >= 2 MB. 421 * If we can't, then just allocate one region per vCPU thread. 422 */ 423 n_regions = tb_size / (2 * MiB); 424 if (n_regions <= max_cpus) { 425 return max_cpus; 426 } 427 return MIN(n_regions, max_cpus * 8); 428 #endif 429 } 430 431 /* 432 * Minimum size of the code gen buffer. This number is randomly chosen, 433 * but not so small that we can't have a fair number of TB's live. 434 * 435 * Maximum size, MAX_CODE_GEN_BUFFER_SIZE, is defined in tcg-target.h. 436 * Unless otherwise indicated, this is constrained by the range of 437 * direct branches on the host cpu, as used by the TCG implementation 438 * of goto_tb. 439 */ 440 #define MIN_CODE_GEN_BUFFER_SIZE (1 * MiB) 441 442 #if TCG_TARGET_REG_BITS == 32 443 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32 * MiB) 444 #ifdef CONFIG_USER_ONLY 445 /* 446 * For user mode on smaller 32 bit systems we may run into trouble 447 * allocating big chunks of data in the right place. On these systems 448 * we utilise a static code generation buffer directly in the binary. 449 */ 450 #define USE_STATIC_CODE_GEN_BUFFER 451 #endif 452 #else /* TCG_TARGET_REG_BITS == 64 */ 453 #ifdef CONFIG_USER_ONLY 454 /* 455 * As user-mode emulation typically means running multiple instances 456 * of the translator don't go too nuts with our default code gen 457 * buffer lest we make things too hard for the OS. 458 */ 459 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (128 * MiB) 460 #else 461 /* 462 * We expect most system emulation to run one or two guests per host. 463 * Users running large scale system emulation may want to tweak their 464 * runtime setup via the tb-size control on the command line. 465 */ 466 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (1 * GiB) 467 #endif 468 #endif 469 470 #define DEFAULT_CODE_GEN_BUFFER_SIZE \ 471 (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \ 472 ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE) 473 474 #ifdef USE_STATIC_CODE_GEN_BUFFER 475 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE] 476 __attribute__((aligned(CODE_GEN_ALIGN))); 477 478 static int alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp) 479 { 480 void *buf, *end; 481 size_t size; 482 483 if (splitwx > 0) { 484 error_setg(errp, "jit split-wx not supported"); 485 return -1; 486 } 487 488 /* page-align the beginning and end of the buffer */ 489 buf = static_code_gen_buffer; 490 end = static_code_gen_buffer + sizeof(static_code_gen_buffer); 491 buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size()); 492 end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size()); 493 494 size = end - buf; 495 496 /* Honor a command-line option limiting the size of the buffer. */ 497 if (size > tb_size) { 498 size = QEMU_ALIGN_DOWN(tb_size, qemu_real_host_page_size()); 499 } 500 501 region.start_aligned = buf; 502 region.total_size = size; 503 504 return PROT_READ | PROT_WRITE; 505 } 506 #elif defined(_WIN32) 507 static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp) 508 { 509 void *buf; 510 511 if (splitwx > 0) { 512 error_setg(errp, "jit split-wx not supported"); 513 return -1; 514 } 515 516 buf = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT, 517 PAGE_EXECUTE_READWRITE); 518 if (buf == NULL) { 519 error_setg_win32(errp, GetLastError(), 520 "allocate %zu bytes for jit buffer", size); 521 return false; 522 } 523 524 region.start_aligned = buf; 525 region.total_size = size; 526 527 return PAGE_READ | PAGE_WRITE | PAGE_EXEC; 528 } 529 #else 530 static int alloc_code_gen_buffer_anon(size_t size, int prot, 531 int flags, Error **errp) 532 { 533 void *buf; 534 535 buf = mmap(NULL, size, prot, flags, -1, 0); 536 if (buf == MAP_FAILED) { 537 error_setg_errno(errp, errno, 538 "allocate %zu bytes for jit buffer", size); 539 return -1; 540 } 541 542 region.start_aligned = buf; 543 region.total_size = size; 544 return prot; 545 } 546 547 #ifndef CONFIG_TCG_INTERPRETER 548 #ifdef CONFIG_POSIX 549 #include "qemu/memfd.h" 550 551 static int alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp) 552 { 553 void *buf_rw = NULL, *buf_rx = MAP_FAILED; 554 int fd = -1; 555 556 buf_rw = qemu_memfd_alloc("tcg-jit", size, 0, &fd, errp); 557 if (buf_rw == NULL) { 558 goto fail; 559 } 560 561 buf_rx = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0); 562 if (buf_rx == MAP_FAILED) { 563 goto fail_rx; 564 } 565 566 close(fd); 567 region.start_aligned = buf_rw; 568 region.total_size = size; 569 tcg_splitwx_diff = buf_rx - buf_rw; 570 571 return PROT_READ | PROT_WRITE; 572 573 fail_rx: 574 error_setg_errno(errp, errno, "failed to map shared memory for execute"); 575 fail: 576 if (buf_rx != MAP_FAILED) { 577 munmap(buf_rx, size); 578 } 579 if (buf_rw) { 580 munmap(buf_rw, size); 581 } 582 if (fd >= 0) { 583 close(fd); 584 } 585 return -1; 586 } 587 #endif /* CONFIG_POSIX */ 588 589 #ifdef CONFIG_DARWIN 590 #include <mach/mach.h> 591 592 extern kern_return_t mach_vm_remap(vm_map_t target_task, 593 mach_vm_address_t *target_address, 594 mach_vm_size_t size, 595 mach_vm_offset_t mask, 596 int flags, 597 vm_map_t src_task, 598 mach_vm_address_t src_address, 599 boolean_t copy, 600 vm_prot_t *cur_protection, 601 vm_prot_t *max_protection, 602 vm_inherit_t inheritance); 603 604 static int alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp) 605 { 606 kern_return_t ret; 607 mach_vm_address_t buf_rw, buf_rx; 608 vm_prot_t cur_prot, max_prot; 609 610 /* Map the read-write portion via normal anon memory. */ 611 if (!alloc_code_gen_buffer_anon(size, PROT_READ | PROT_WRITE, 612 MAP_PRIVATE | MAP_ANONYMOUS, errp)) { 613 return -1; 614 } 615 616 buf_rw = (mach_vm_address_t)region.start_aligned; 617 buf_rx = 0; 618 ret = mach_vm_remap(mach_task_self(), 619 &buf_rx, 620 size, 621 0, 622 VM_FLAGS_ANYWHERE, 623 mach_task_self(), 624 buf_rw, 625 false, 626 &cur_prot, 627 &max_prot, 628 VM_INHERIT_NONE); 629 if (ret != KERN_SUCCESS) { 630 /* TODO: Convert "ret" to a human readable error message. */ 631 error_setg(errp, "vm_remap for jit splitwx failed"); 632 munmap((void *)buf_rw, size); 633 return -1; 634 } 635 636 if (mprotect((void *)buf_rx, size, PROT_READ | PROT_EXEC) != 0) { 637 error_setg_errno(errp, errno, "mprotect for jit splitwx"); 638 munmap((void *)buf_rx, size); 639 munmap((void *)buf_rw, size); 640 return -1; 641 } 642 643 tcg_splitwx_diff = buf_rx - buf_rw; 644 return PROT_READ | PROT_WRITE; 645 } 646 #endif /* CONFIG_DARWIN */ 647 #endif /* CONFIG_TCG_INTERPRETER */ 648 649 static int alloc_code_gen_buffer_splitwx(size_t size, Error **errp) 650 { 651 #ifndef CONFIG_TCG_INTERPRETER 652 # ifdef CONFIG_DARWIN 653 return alloc_code_gen_buffer_splitwx_vmremap(size, errp); 654 # endif 655 # ifdef CONFIG_POSIX 656 return alloc_code_gen_buffer_splitwx_memfd(size, errp); 657 # endif 658 #endif 659 error_setg(errp, "jit split-wx not supported"); 660 return -1; 661 } 662 663 static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp) 664 { 665 ERRP_GUARD(); 666 int prot, flags; 667 668 if (splitwx) { 669 prot = alloc_code_gen_buffer_splitwx(size, errp); 670 if (prot >= 0) { 671 return prot; 672 } 673 /* 674 * If splitwx force-on (1), fail; 675 * if splitwx default-on (-1), fall through to splitwx off. 676 */ 677 if (splitwx > 0) { 678 return -1; 679 } 680 error_free_or_abort(errp); 681 } 682 683 /* 684 * macOS 11.2 has a bug (Apple Feedback FB8994773) in which mprotect 685 * rejects a permission change from RWX -> NONE when reserving the 686 * guard pages later. We can go the other way with the same number 687 * of syscalls, so always begin with PROT_NONE. 688 */ 689 prot = PROT_NONE; 690 flags = MAP_PRIVATE | MAP_ANONYMOUS; 691 #ifdef CONFIG_DARWIN 692 /* Applicable to both iOS and macOS (Apple Silicon). */ 693 if (!splitwx) { 694 flags |= MAP_JIT; 695 } 696 #endif 697 698 return alloc_code_gen_buffer_anon(size, prot, flags, errp); 699 } 700 #endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */ 701 702 /* 703 * Initializes region partitioning. 704 * 705 * Called at init time from the parent thread (i.e. the one calling 706 * tcg_context_init), after the target's TCG globals have been set. 707 * 708 * Region partitioning works by splitting code_gen_buffer into separate regions, 709 * and then assigning regions to TCG threads so that the threads can translate 710 * code in parallel without synchronization. 711 * 712 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at 713 * least max_cpus regions in MTTCG. In !MTTCG we use a single region. 714 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...]) 715 * must have been parsed before calling this function, since it calls 716 * qemu_tcg_mttcg_enabled(). 717 * 718 * In user-mode we use a single region. Having multiple regions in user-mode 719 * is not supported, because the number of vCPU threads (recall that each thread 720 * spawned by the guest corresponds to a vCPU thread) is only bounded by the 721 * OS, and usually this number is huge (tens of thousands is not uncommon). 722 * Thus, given this large bound on the number of vCPU threads and the fact 723 * that code_gen_buffer is allocated at compile-time, we cannot guarantee 724 * that the availability of at least one region per vCPU thread. 725 * 726 * However, this user-mode limitation is unlikely to be a significant problem 727 * in practice. Multi-threaded guests share most if not all of their translated 728 * code, which makes parallel code generation less appealing than in softmmu. 729 */ 730 void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus) 731 { 732 const size_t page_size = qemu_real_host_page_size(); 733 size_t region_size; 734 int have_prot, need_prot; 735 736 /* Size the buffer. */ 737 if (tb_size == 0) { 738 size_t phys_mem = qemu_get_host_physmem(); 739 if (phys_mem == 0) { 740 tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE; 741 } else { 742 tb_size = QEMU_ALIGN_DOWN(phys_mem / 8, page_size); 743 tb_size = MIN(DEFAULT_CODE_GEN_BUFFER_SIZE, tb_size); 744 } 745 } 746 if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) { 747 tb_size = MIN_CODE_GEN_BUFFER_SIZE; 748 } 749 if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) { 750 tb_size = MAX_CODE_GEN_BUFFER_SIZE; 751 } 752 753 have_prot = alloc_code_gen_buffer(tb_size, splitwx, &error_fatal); 754 assert(have_prot >= 0); 755 756 /* Request large pages for the buffer and the splitwx. */ 757 qemu_madvise(region.start_aligned, region.total_size, QEMU_MADV_HUGEPAGE); 758 if (tcg_splitwx_diff) { 759 qemu_madvise(region.start_aligned + tcg_splitwx_diff, 760 region.total_size, QEMU_MADV_HUGEPAGE); 761 } 762 763 /* 764 * Make region_size a multiple of page_size, using aligned as the start. 765 * As a result of this we might end up with a few extra pages at the end of 766 * the buffer; we will assign those to the last region. 767 */ 768 region.n = tcg_n_regions(tb_size, max_cpus); 769 region_size = tb_size / region.n; 770 region_size = QEMU_ALIGN_DOWN(region_size, page_size); 771 772 /* A region must have at least 2 pages; one code, one guard */ 773 g_assert(region_size >= 2 * page_size); 774 region.stride = region_size; 775 776 /* Reserve space for guard pages. */ 777 region.size = region_size - page_size; 778 region.total_size -= page_size; 779 780 /* 781 * The first region will be smaller than the others, via the prologue, 782 * which has yet to be allocated. For now, the first region begins at 783 * the page boundary. 784 */ 785 region.after_prologue = region.start_aligned; 786 787 /* init the region struct */ 788 qemu_mutex_init(®ion.lock); 789 790 /* 791 * Set guard pages in the rw buffer, as that's the one into which 792 * buffer overruns could occur. Do not set guard pages in the rx 793 * buffer -- let that one use hugepages throughout. 794 * Work with the page protections set up with the initial mapping. 795 */ 796 need_prot = PAGE_READ | PAGE_WRITE; 797 #ifndef CONFIG_TCG_INTERPRETER 798 if (tcg_splitwx_diff == 0) { 799 need_prot |= PAGE_EXEC; 800 } 801 #endif 802 for (size_t i = 0, n = region.n; i < n; i++) { 803 void *start, *end; 804 805 tcg_region_bounds(i, &start, &end); 806 if (have_prot != need_prot) { 807 int rc; 808 809 if (need_prot == (PAGE_READ | PAGE_WRITE | PAGE_EXEC)) { 810 rc = qemu_mprotect_rwx(start, end - start); 811 } else if (need_prot == (PAGE_READ | PAGE_WRITE)) { 812 rc = qemu_mprotect_rw(start, end - start); 813 } else { 814 g_assert_not_reached(); 815 } 816 if (rc) { 817 error_setg_errno(&error_fatal, errno, 818 "mprotect of jit buffer"); 819 } 820 } 821 if (have_prot != 0) { 822 /* Guard pages are nice for bug detection but are not essential. */ 823 (void)qemu_mprotect_none(end, page_size); 824 } 825 } 826 827 tcg_region_trees_init(); 828 829 /* 830 * Leave the initial context initialized to the first region. 831 * This will be the context into which we generate the prologue. 832 * It is also the only context for CONFIG_USER_ONLY. 833 */ 834 tcg_region_initial_alloc__locked(&tcg_init_ctx); 835 } 836 837 void tcg_region_prologue_set(TCGContext *s) 838 { 839 /* Deduct the prologue from the first region. */ 840 g_assert(region.start_aligned == s->code_gen_buffer); 841 region.after_prologue = s->code_ptr; 842 843 /* Recompute boundaries of the first region. */ 844 tcg_region_assign(s, 0); 845 846 /* Register the balance of the buffer with gdb. */ 847 tcg_register_jit(tcg_splitwx_to_rx(region.after_prologue), 848 region.start_aligned + region.total_size - 849 region.after_prologue); 850 } 851 852 /* 853 * Returns the size (in bytes) of all translated code (i.e. from all regions) 854 * currently in the cache. 855 * See also: tcg_code_capacity() 856 * Do not confuse with tcg_current_code_size(); that one applies to a single 857 * TCG context. 858 */ 859 size_t tcg_code_size(void) 860 { 861 unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs); 862 unsigned int i; 863 size_t total; 864 865 qemu_mutex_lock(®ion.lock); 866 total = region.agg_size_full; 867 for (i = 0; i < n_ctxs; i++) { 868 const TCGContext *s = qatomic_read(&tcg_ctxs[i]); 869 size_t size; 870 871 size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer; 872 g_assert(size <= s->code_gen_buffer_size); 873 total += size; 874 } 875 qemu_mutex_unlock(®ion.lock); 876 return total; 877 } 878 879 /* 880 * Returns the code capacity (in bytes) of the entire cache, i.e. including all 881 * regions. 882 * See also: tcg_code_size() 883 */ 884 size_t tcg_code_capacity(void) 885 { 886 size_t guard_size, capacity; 887 888 /* no need for synchronization; these variables are set at init time */ 889 guard_size = region.stride - region.size; 890 capacity = region.total_size; 891 capacity -= (region.n - 1) * guard_size; 892 capacity -= region.n * TCG_HIGHWATER; 893 894 return capacity; 895 } 896