xref: /openbmc/qemu/tcg/region.c (revision 31afe04586efeccb80cc36ffafcd0e32a3245ffb)
1  /*
2   * Memory region management for Tiny Code Generator for QEMU
3   *
4   * Copyright (c) 2008 Fabrice Bellard
5   *
6   * Permission is hereby granted, free of charge, to any person obtaining a copy
7   * of this software and associated documentation files (the "Software"), to deal
8   * in the Software without restriction, including without limitation the rights
9   * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10   * copies of the Software, and to permit persons to whom the Software is
11   * furnished to do so, subject to the following conditions:
12   *
13   * The above copyright notice and this permission notice shall be included in
14   * all copies or substantial portions of the Software.
15   *
16   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19   * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22   * THE SOFTWARE.
23   */
24  
25  #include "qemu/osdep.h"
26  #include "qemu/units.h"
27  #include "qemu/madvise.h"
28  #include "qemu/mprotect.h"
29  #include "qemu/memalign.h"
30  #include "qemu/cacheinfo.h"
31  #include "qemu/qtree.h"
32  #include "qapi/error.h"
33  #include "exec/exec-all.h"
34  #include "tcg/tcg.h"
35  #include "tcg-internal.h"
36  
37  
38  struct tcg_region_tree {
39      QemuMutex lock;
40      QTree *tree;
41      /* padding to avoid false sharing is computed at run-time */
42  };
43  
44  /*
45   * We divide code_gen_buffer into equally-sized "regions" that TCG threads
46   * dynamically allocate from as demand dictates. Given appropriate region
47   * sizing, this minimizes flushes even when some TCG threads generate a lot
48   * more code than others.
49   */
50  struct tcg_region_state {
51      QemuMutex lock;
52  
53      /* fields set at init time */
54      void *start_aligned;
55      void *after_prologue;
56      size_t n;
57      size_t size; /* size of one region */
58      size_t stride; /* .size + guard size */
59      size_t total_size; /* size of entire buffer, >= n * stride */
60  
61      /* fields protected by the lock */
62      size_t current; /* current region index */
63      size_t agg_size_full; /* aggregate size of full regions */
64  };
65  
66  static struct tcg_region_state region;
67  
68  /*
69   * This is an array of struct tcg_region_tree's, with padding.
70   * We use void * to simplify the computation of region_trees[i]; each
71   * struct is found every tree_size bytes.
72   */
73  static void *region_trees;
74  static size_t tree_size;
75  
76  bool in_code_gen_buffer(const void *p)
77  {
78      /*
79       * Much like it is valid to have a pointer to the byte past the
80       * end of an array (so long as you don't dereference it), allow
81       * a pointer to the byte past the end of the code gen buffer.
82       */
83      return (size_t)(p - region.start_aligned) <= region.total_size;
84  }
85  
86  #ifdef CONFIG_DEBUG_TCG
87  const void *tcg_splitwx_to_rx(void *rw)
88  {
89      /* Pass NULL pointers unchanged. */
90      if (rw) {
91          g_assert(in_code_gen_buffer(rw));
92          rw += tcg_splitwx_diff;
93      }
94      return rw;
95  }
96  
97  void *tcg_splitwx_to_rw(const void *rx)
98  {
99      /* Pass NULL pointers unchanged. */
100      if (rx) {
101          rx -= tcg_splitwx_diff;
102          /* Assert that we end with a pointer in the rw region. */
103          g_assert(in_code_gen_buffer(rx));
104      }
105      return (void *)rx;
106  }
107  #endif /* CONFIG_DEBUG_TCG */
108  
109  /* compare a pointer @ptr and a tb_tc @s */
110  static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
111  {
112      if (ptr >= s->ptr + s->size) {
113          return 1;
114      } else if (ptr < s->ptr) {
115          return -1;
116      }
117      return 0;
118  }
119  
120  static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp, gpointer userdata)
121  {
122      const struct tb_tc *a = ap;
123      const struct tb_tc *b = bp;
124  
125      /*
126       * When both sizes are set, we know this isn't a lookup.
127       * This is the most likely case: every TB must be inserted; lookups
128       * are a lot less frequent.
129       */
130      if (likely(a->size && b->size)) {
131          if (a->ptr > b->ptr) {
132              return 1;
133          } else if (a->ptr < b->ptr) {
134              return -1;
135          }
136          /* a->ptr == b->ptr should happen only on deletions */
137          g_assert(a->size == b->size);
138          return 0;
139      }
140      /*
141       * All lookups have either .size field set to 0.
142       * From the glib sources we see that @ap is always the lookup key. However
143       * the docs provide no guarantee, so we just mark this case as likely.
144       */
145      if (likely(a->size == 0)) {
146          return ptr_cmp_tb_tc(a->ptr, b);
147      }
148      return ptr_cmp_tb_tc(b->ptr, a);
149  }
150  
151  static void tb_destroy(gpointer value)
152  {
153      TranslationBlock *tb = value;
154      qemu_spin_destroy(&tb->jmp_lock);
155  }
156  
157  static void tcg_region_trees_init(void)
158  {
159      size_t i;
160  
161      tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
162      region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
163      for (i = 0; i < region.n; i++) {
164          struct tcg_region_tree *rt = region_trees + i * tree_size;
165  
166          qemu_mutex_init(&rt->lock);
167          rt->tree = q_tree_new_full(tb_tc_cmp, NULL, NULL, tb_destroy);
168      }
169  }
170  
171  static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p)
172  {
173      size_t region_idx;
174  
175      /*
176       * Like tcg_splitwx_to_rw, with no assert.  The pc may come from
177       * a signal handler over which the caller has no control.
178       */
179      if (!in_code_gen_buffer(p)) {
180          p -= tcg_splitwx_diff;
181          if (!in_code_gen_buffer(p)) {
182              return NULL;
183          }
184      }
185  
186      if (p < region.start_aligned) {
187          region_idx = 0;
188      } else {
189          ptrdiff_t offset = p - region.start_aligned;
190  
191          if (offset > region.stride * (region.n - 1)) {
192              region_idx = region.n - 1;
193          } else {
194              region_idx = offset / region.stride;
195          }
196      }
197      return region_trees + region_idx * tree_size;
198  }
199  
200  void tcg_tb_insert(TranslationBlock *tb)
201  {
202      struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
203  
204      g_assert(rt != NULL);
205      qemu_mutex_lock(&rt->lock);
206      q_tree_insert(rt->tree, &tb->tc, tb);
207      qemu_mutex_unlock(&rt->lock);
208  }
209  
210  void tcg_tb_remove(TranslationBlock *tb)
211  {
212      struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
213  
214      g_assert(rt != NULL);
215      qemu_mutex_lock(&rt->lock);
216      q_tree_remove(rt->tree, &tb->tc);
217      qemu_mutex_unlock(&rt->lock);
218  }
219  
220  /*
221   * Find the TB 'tb' such that
222   * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
223   * Return NULL if not found.
224   */
225  TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
226  {
227      struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
228      TranslationBlock *tb;
229      struct tb_tc s = { .ptr = (void *)tc_ptr };
230  
231      if (rt == NULL) {
232          return NULL;
233      }
234  
235      qemu_mutex_lock(&rt->lock);
236      tb = q_tree_lookup(rt->tree, &s);
237      qemu_mutex_unlock(&rt->lock);
238      return tb;
239  }
240  
241  static void tcg_region_tree_lock_all(void)
242  {
243      size_t i;
244  
245      for (i = 0; i < region.n; i++) {
246          struct tcg_region_tree *rt = region_trees + i * tree_size;
247  
248          qemu_mutex_lock(&rt->lock);
249      }
250  }
251  
252  static void tcg_region_tree_unlock_all(void)
253  {
254      size_t i;
255  
256      for (i = 0; i < region.n; i++) {
257          struct tcg_region_tree *rt = region_trees + i * tree_size;
258  
259          qemu_mutex_unlock(&rt->lock);
260      }
261  }
262  
263  void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
264  {
265      size_t i;
266  
267      tcg_region_tree_lock_all();
268      for (i = 0; i < region.n; i++) {
269          struct tcg_region_tree *rt = region_trees + i * tree_size;
270  
271          q_tree_foreach(rt->tree, func, user_data);
272      }
273      tcg_region_tree_unlock_all();
274  }
275  
276  size_t tcg_nb_tbs(void)
277  {
278      size_t nb_tbs = 0;
279      size_t i;
280  
281      tcg_region_tree_lock_all();
282      for (i = 0; i < region.n; i++) {
283          struct tcg_region_tree *rt = region_trees + i * tree_size;
284  
285          nb_tbs += q_tree_nnodes(rt->tree);
286      }
287      tcg_region_tree_unlock_all();
288      return nb_tbs;
289  }
290  
291  static void tcg_region_tree_reset_all(void)
292  {
293      size_t i;
294  
295      tcg_region_tree_lock_all();
296      for (i = 0; i < region.n; i++) {
297          struct tcg_region_tree *rt = region_trees + i * tree_size;
298  
299          /* Increment the refcount first so that destroy acts as a reset */
300          q_tree_ref(rt->tree);
301          q_tree_destroy(rt->tree);
302      }
303      tcg_region_tree_unlock_all();
304  }
305  
306  static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
307  {
308      void *start, *end;
309  
310      start = region.start_aligned + curr_region * region.stride;
311      end = start + region.size;
312  
313      if (curr_region == 0) {
314          start = region.after_prologue;
315      }
316      /* The final region may have a few extra pages due to earlier rounding. */
317      if (curr_region == region.n - 1) {
318          end = region.start_aligned + region.total_size;
319      }
320  
321      *pstart = start;
322      *pend = end;
323  }
324  
325  static void tcg_region_assign(TCGContext *s, size_t curr_region)
326  {
327      void *start, *end;
328  
329      tcg_region_bounds(curr_region, &start, &end);
330  
331      s->code_gen_buffer = start;
332      s->code_gen_ptr = start;
333      s->code_gen_buffer_size = end - start;
334      s->code_gen_highwater = end - TCG_HIGHWATER;
335  }
336  
337  static bool tcg_region_alloc__locked(TCGContext *s)
338  {
339      if (region.current == region.n) {
340          return true;
341      }
342      tcg_region_assign(s, region.current);
343      region.current++;
344      return false;
345  }
346  
347  /*
348   * Request a new region once the one in use has filled up.
349   * Returns true on error.
350   */
351  bool tcg_region_alloc(TCGContext *s)
352  {
353      bool err;
354      /* read the region size now; alloc__locked will overwrite it on success */
355      size_t size_full = s->code_gen_buffer_size;
356  
357      qemu_mutex_lock(&region.lock);
358      err = tcg_region_alloc__locked(s);
359      if (!err) {
360          region.agg_size_full += size_full - TCG_HIGHWATER;
361      }
362      qemu_mutex_unlock(&region.lock);
363      return err;
364  }
365  
366  /*
367   * Perform a context's first region allocation.
368   * This function does _not_ increment region.agg_size_full.
369   */
370  static void tcg_region_initial_alloc__locked(TCGContext *s)
371  {
372      bool err = tcg_region_alloc__locked(s);
373      g_assert(!err);
374  }
375  
376  void tcg_region_initial_alloc(TCGContext *s)
377  {
378      qemu_mutex_lock(&region.lock);
379      tcg_region_initial_alloc__locked(s);
380      qemu_mutex_unlock(&region.lock);
381  }
382  
383  /* Call from a safe-work context */
384  void tcg_region_reset_all(void)
385  {
386      unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
387      unsigned int i;
388  
389      qemu_mutex_lock(&region.lock);
390      region.current = 0;
391      region.agg_size_full = 0;
392  
393      for (i = 0; i < n_ctxs; i++) {
394          TCGContext *s = qatomic_read(&tcg_ctxs[i]);
395          tcg_region_initial_alloc__locked(s);
396      }
397      qemu_mutex_unlock(&region.lock);
398  
399      tcg_region_tree_reset_all();
400  }
401  
402  static size_t tcg_n_regions(size_t tb_size, unsigned max_cpus)
403  {
404  #ifdef CONFIG_USER_ONLY
405      return 1;
406  #else
407      size_t n_regions;
408  
409      /*
410       * It is likely that some vCPUs will translate more code than others,
411       * so we first try to set more regions than max_cpus, with those regions
412       * being of reasonable size. If that's not possible we make do by evenly
413       * dividing the code_gen_buffer among the vCPUs.
414       */
415      /* Use a single region if all we have is one vCPU thread */
416      if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
417          return 1;
418      }
419  
420      /*
421       * Try to have more regions than max_cpus, with each region being >= 2 MB.
422       * If we can't, then just allocate one region per vCPU thread.
423       */
424      n_regions = tb_size / (2 * MiB);
425      if (n_regions <= max_cpus) {
426          return max_cpus;
427      }
428      return MIN(n_regions, max_cpus * 8);
429  #endif
430  }
431  
432  /*
433   * Minimum size of the code gen buffer.  This number is randomly chosen,
434   * but not so small that we can't have a fair number of TB's live.
435   *
436   * Maximum size, MAX_CODE_GEN_BUFFER_SIZE, is defined in tcg-target.h.
437   * Unless otherwise indicated, this is constrained by the range of
438   * direct branches on the host cpu, as used by the TCG implementation
439   * of goto_tb.
440   */
441  #define MIN_CODE_GEN_BUFFER_SIZE     (1 * MiB)
442  
443  #if TCG_TARGET_REG_BITS == 32
444  #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32 * MiB)
445  #ifdef CONFIG_USER_ONLY
446  /*
447   * For user mode on smaller 32 bit systems we may run into trouble
448   * allocating big chunks of data in the right place. On these systems
449   * we utilise a static code generation buffer directly in the binary.
450   */
451  #define USE_STATIC_CODE_GEN_BUFFER
452  #endif
453  #else /* TCG_TARGET_REG_BITS == 64 */
454  #ifdef CONFIG_USER_ONLY
455  /*
456   * As user-mode emulation typically means running multiple instances
457   * of the translator don't go too nuts with our default code gen
458   * buffer lest we make things too hard for the OS.
459   */
460  #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (128 * MiB)
461  #else
462  /*
463   * We expect most system emulation to run one or two guests per host.
464   * Users running large scale system emulation may want to tweak their
465   * runtime setup via the tb-size control on the command line.
466   */
467  #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (1 * GiB)
468  #endif
469  #endif
470  
471  #define DEFAULT_CODE_GEN_BUFFER_SIZE \
472    (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
473     ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
474  
475  #ifdef USE_STATIC_CODE_GEN_BUFFER
476  static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
477      __attribute__((aligned(CODE_GEN_ALIGN)));
478  
479  static int alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp)
480  {
481      void *buf, *end;
482      size_t size;
483  
484      if (splitwx > 0) {
485          error_setg(errp, "jit split-wx not supported");
486          return -1;
487      }
488  
489      /* page-align the beginning and end of the buffer */
490      buf = static_code_gen_buffer;
491      end = static_code_gen_buffer + sizeof(static_code_gen_buffer);
492      buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size());
493      end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size());
494  
495      size = end - buf;
496  
497      /* Honor a command-line option limiting the size of the buffer.  */
498      if (size > tb_size) {
499          size = QEMU_ALIGN_DOWN(tb_size, qemu_real_host_page_size());
500      }
501  
502      region.start_aligned = buf;
503      region.total_size = size;
504  
505      return PROT_READ | PROT_WRITE;
506  }
507  #elif defined(_WIN32)
508  static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
509  {
510      void *buf;
511  
512      if (splitwx > 0) {
513          error_setg(errp, "jit split-wx not supported");
514          return -1;
515      }
516  
517      buf = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT,
518                               PAGE_EXECUTE_READWRITE);
519      if (buf == NULL) {
520          error_setg_win32(errp, GetLastError(),
521                           "allocate %zu bytes for jit buffer", size);
522          return false;
523      }
524  
525      region.start_aligned = buf;
526      region.total_size = size;
527  
528      return PAGE_READ | PAGE_WRITE | PAGE_EXEC;
529  }
530  #else
531  static int alloc_code_gen_buffer_anon(size_t size, int prot,
532                                        int flags, Error **errp)
533  {
534      void *buf;
535  
536      buf = mmap(NULL, size, prot, flags, -1, 0);
537      if (buf == MAP_FAILED) {
538          error_setg_errno(errp, errno,
539                           "allocate %zu bytes for jit buffer", size);
540          return -1;
541      }
542  
543      region.start_aligned = buf;
544      region.total_size = size;
545      return prot;
546  }
547  
548  #ifndef CONFIG_TCG_INTERPRETER
549  #ifdef CONFIG_POSIX
550  #include "qemu/memfd.h"
551  
552  static int alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)
553  {
554      void *buf_rw = NULL, *buf_rx = MAP_FAILED;
555      int fd = -1;
556  
557      buf_rw = qemu_memfd_alloc("tcg-jit", size, 0, &fd, errp);
558      if (buf_rw == NULL) {
559          goto fail;
560      }
561  
562      buf_rx = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
563      if (buf_rx == MAP_FAILED) {
564          goto fail_rx;
565      }
566  
567      close(fd);
568      region.start_aligned = buf_rw;
569      region.total_size = size;
570      tcg_splitwx_diff = buf_rx - buf_rw;
571  
572      return PROT_READ | PROT_WRITE;
573  
574   fail_rx:
575      error_setg_errno(errp, errno, "failed to map shared memory for execute");
576   fail:
577      if (buf_rx != MAP_FAILED) {
578          munmap(buf_rx, size);
579      }
580      if (buf_rw) {
581          munmap(buf_rw, size);
582      }
583      if (fd >= 0) {
584          close(fd);
585      }
586      return -1;
587  }
588  #endif /* CONFIG_POSIX */
589  
590  #ifdef CONFIG_DARWIN
591  #include <mach/mach.h>
592  
593  extern kern_return_t mach_vm_remap(vm_map_t target_task,
594                                     mach_vm_address_t *target_address,
595                                     mach_vm_size_t size,
596                                     mach_vm_offset_t mask,
597                                     int flags,
598                                     vm_map_t src_task,
599                                     mach_vm_address_t src_address,
600                                     boolean_t copy,
601                                     vm_prot_t *cur_protection,
602                                     vm_prot_t *max_protection,
603                                     vm_inherit_t inheritance);
604  
605  static int alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp)
606  {
607      kern_return_t ret;
608      mach_vm_address_t buf_rw, buf_rx;
609      vm_prot_t cur_prot, max_prot;
610  
611      /* Map the read-write portion via normal anon memory. */
612      if (!alloc_code_gen_buffer_anon(size, PROT_READ | PROT_WRITE,
613                                      MAP_PRIVATE | MAP_ANONYMOUS, errp)) {
614          return -1;
615      }
616  
617      buf_rw = (mach_vm_address_t)region.start_aligned;
618      buf_rx = 0;
619      ret = mach_vm_remap(mach_task_self(),
620                          &buf_rx,
621                          size,
622                          0,
623                          VM_FLAGS_ANYWHERE,
624                          mach_task_self(),
625                          buf_rw,
626                          false,
627                          &cur_prot,
628                          &max_prot,
629                          VM_INHERIT_NONE);
630      if (ret != KERN_SUCCESS) {
631          /* TODO: Convert "ret" to a human readable error message. */
632          error_setg(errp, "vm_remap for jit splitwx failed");
633          munmap((void *)buf_rw, size);
634          return -1;
635      }
636  
637      if (mprotect((void *)buf_rx, size, PROT_READ | PROT_EXEC) != 0) {
638          error_setg_errno(errp, errno, "mprotect for jit splitwx");
639          munmap((void *)buf_rx, size);
640          munmap((void *)buf_rw, size);
641          return -1;
642      }
643  
644      tcg_splitwx_diff = buf_rx - buf_rw;
645      return PROT_READ | PROT_WRITE;
646  }
647  #endif /* CONFIG_DARWIN */
648  #endif /* CONFIG_TCG_INTERPRETER */
649  
650  static int alloc_code_gen_buffer_splitwx(size_t size, Error **errp)
651  {
652  #ifndef CONFIG_TCG_INTERPRETER
653  # ifdef CONFIG_DARWIN
654      return alloc_code_gen_buffer_splitwx_vmremap(size, errp);
655  # endif
656  # ifdef CONFIG_POSIX
657      return alloc_code_gen_buffer_splitwx_memfd(size, errp);
658  # endif
659  #endif
660      error_setg(errp, "jit split-wx not supported");
661      return -1;
662  }
663  
664  static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
665  {
666      ERRP_GUARD();
667      int prot, flags;
668  
669      if (splitwx) {
670          prot = alloc_code_gen_buffer_splitwx(size, errp);
671          if (prot >= 0) {
672              return prot;
673          }
674          /*
675           * If splitwx force-on (1), fail;
676           * if splitwx default-on (-1), fall through to splitwx off.
677           */
678          if (splitwx > 0) {
679              return -1;
680          }
681          error_free_or_abort(errp);
682      }
683  
684      /*
685       * macOS 11.2 has a bug (Apple Feedback FB8994773) in which mprotect
686       * rejects a permission change from RWX -> NONE when reserving the
687       * guard pages later.  We can go the other way with the same number
688       * of syscalls, so always begin with PROT_NONE.
689       */
690      prot = PROT_NONE;
691      flags = MAP_PRIVATE | MAP_ANONYMOUS;
692  #ifdef CONFIG_DARWIN
693      /* Applicable to both iOS and macOS (Apple Silicon). */
694      if (!splitwx) {
695          flags |= MAP_JIT;
696      }
697  #endif
698  
699      return alloc_code_gen_buffer_anon(size, prot, flags, errp);
700  }
701  #endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */
702  
703  /*
704   * Initializes region partitioning.
705   *
706   * Called at init time from the parent thread (i.e. the one calling
707   * tcg_context_init), after the target's TCG globals have been set.
708   *
709   * Region partitioning works by splitting code_gen_buffer into separate regions,
710   * and then assigning regions to TCG threads so that the threads can translate
711   * code in parallel without synchronization.
712   *
713   * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
714   * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
715   * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
716   * must have been parsed before calling this function, since it calls
717   * qemu_tcg_mttcg_enabled().
718   *
719   * In user-mode we use a single region.  Having multiple regions in user-mode
720   * is not supported, because the number of vCPU threads (recall that each thread
721   * spawned by the guest corresponds to a vCPU thread) is only bounded by the
722   * OS, and usually this number is huge (tens of thousands is not uncommon).
723   * Thus, given this large bound on the number of vCPU threads and the fact
724   * that code_gen_buffer is allocated at compile-time, we cannot guarantee
725   * that the availability of at least one region per vCPU thread.
726   *
727   * However, this user-mode limitation is unlikely to be a significant problem
728   * in practice. Multi-threaded guests share most if not all of their translated
729   * code, which makes parallel code generation less appealing than in softmmu.
730   */
731  void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus)
732  {
733      const size_t page_size = qemu_real_host_page_size();
734      size_t region_size;
735      int have_prot, need_prot;
736  
737      /* Size the buffer.  */
738      if (tb_size == 0) {
739          size_t phys_mem = qemu_get_host_physmem();
740          if (phys_mem == 0) {
741              tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
742          } else {
743              tb_size = QEMU_ALIGN_DOWN(phys_mem / 8, page_size);
744              tb_size = MIN(DEFAULT_CODE_GEN_BUFFER_SIZE, tb_size);
745          }
746      }
747      if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
748          tb_size = MIN_CODE_GEN_BUFFER_SIZE;
749      }
750      if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
751          tb_size = MAX_CODE_GEN_BUFFER_SIZE;
752      }
753  
754      have_prot = alloc_code_gen_buffer(tb_size, splitwx, &error_fatal);
755      assert(have_prot >= 0);
756  
757      /* Request large pages for the buffer and the splitwx.  */
758      qemu_madvise(region.start_aligned, region.total_size, QEMU_MADV_HUGEPAGE);
759      if (tcg_splitwx_diff) {
760          qemu_madvise(region.start_aligned + tcg_splitwx_diff,
761                       region.total_size, QEMU_MADV_HUGEPAGE);
762      }
763  
764      /*
765       * Make region_size a multiple of page_size, using aligned as the start.
766       * As a result of this we might end up with a few extra pages at the end of
767       * the buffer; we will assign those to the last region.
768       */
769      region.n = tcg_n_regions(tb_size, max_cpus);
770      region_size = tb_size / region.n;
771      region_size = QEMU_ALIGN_DOWN(region_size, page_size);
772  
773      /* A region must have at least 2 pages; one code, one guard */
774      g_assert(region_size >= 2 * page_size);
775      region.stride = region_size;
776  
777      /* Reserve space for guard pages. */
778      region.size = region_size - page_size;
779      region.total_size -= page_size;
780  
781      /*
782       * The first region will be smaller than the others, via the prologue,
783       * which has yet to be allocated.  For now, the first region begins at
784       * the page boundary.
785       */
786      region.after_prologue = region.start_aligned;
787  
788      /* init the region struct */
789      qemu_mutex_init(&region.lock);
790  
791      /*
792       * Set guard pages in the rw buffer, as that's the one into which
793       * buffer overruns could occur.  Do not set guard pages in the rx
794       * buffer -- let that one use hugepages throughout.
795       * Work with the page protections set up with the initial mapping.
796       */
797      need_prot = PAGE_READ | PAGE_WRITE;
798  #ifndef CONFIG_TCG_INTERPRETER
799      if (tcg_splitwx_diff == 0) {
800          need_prot |= PAGE_EXEC;
801      }
802  #endif
803      for (size_t i = 0, n = region.n; i < n; i++) {
804          void *start, *end;
805  
806          tcg_region_bounds(i, &start, &end);
807          if (have_prot != need_prot) {
808              int rc;
809  
810              if (need_prot == (PAGE_READ | PAGE_WRITE | PAGE_EXEC)) {
811                  rc = qemu_mprotect_rwx(start, end - start);
812              } else if (need_prot == (PAGE_READ | PAGE_WRITE)) {
813                  rc = qemu_mprotect_rw(start, end - start);
814              } else {
815                  g_assert_not_reached();
816              }
817              if (rc) {
818                  error_setg_errno(&error_fatal, errno,
819                                   "mprotect of jit buffer");
820              }
821          }
822          if (have_prot != 0) {
823              /* Guard pages are nice for bug detection but are not essential. */
824              (void)qemu_mprotect_none(end, page_size);
825          }
826      }
827  
828      tcg_region_trees_init();
829  
830      /*
831       * Leave the initial context initialized to the first region.
832       * This will be the context into which we generate the prologue.
833       * It is also the only context for CONFIG_USER_ONLY.
834       */
835      tcg_region_initial_alloc__locked(&tcg_init_ctx);
836  }
837  
838  void tcg_region_prologue_set(TCGContext *s)
839  {
840      /* Deduct the prologue from the first region.  */
841      g_assert(region.start_aligned == s->code_gen_buffer);
842      region.after_prologue = s->code_ptr;
843  
844      /* Recompute boundaries of the first region. */
845      tcg_region_assign(s, 0);
846  
847      /* Register the balance of the buffer with gdb. */
848      tcg_register_jit(tcg_splitwx_to_rx(region.after_prologue),
849                       region.start_aligned + region.total_size -
850                       region.after_prologue);
851  }
852  
853  /*
854   * Returns the size (in bytes) of all translated code (i.e. from all regions)
855   * currently in the cache.
856   * See also: tcg_code_capacity()
857   * Do not confuse with tcg_current_code_size(); that one applies to a single
858   * TCG context.
859   */
860  size_t tcg_code_size(void)
861  {
862      unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
863      unsigned int i;
864      size_t total;
865  
866      qemu_mutex_lock(&region.lock);
867      total = region.agg_size_full;
868      for (i = 0; i < n_ctxs; i++) {
869          const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
870          size_t size;
871  
872          size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
873          g_assert(size <= s->code_gen_buffer_size);
874          total += size;
875      }
876      qemu_mutex_unlock(&region.lock);
877      return total;
878  }
879  
880  /*
881   * Returns the code capacity (in bytes) of the entire cache, i.e. including all
882   * regions.
883   * See also: tcg_code_size()
884   */
885  size_t tcg_code_capacity(void)
886  {
887      size_t guard_size, capacity;
888  
889      /* no need for synchronization; these variables are set at init time */
890      guard_size = region.stride - region.size;
891      capacity = region.total_size;
892      capacity -= (region.n - 1) * guard_size;
893      capacity -= region.n * TCG_HIGHWATER;
894  
895      return capacity;
896  }
897