tcg/region.c

   1 /*
   2  * Memory region management for Tiny Code Generator for QEMU
   3  *
   4  * Copyright (c) 2008 Fabrice Bellard
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a copy
   7  * of this software and associated documentation files (the "Software"), to deal
   8  * in the Software without restriction, including without limitation the rights
   9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10  * copies of the Software, and to permit persons to whom the Software is
  11  * furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included in
  14  * all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22  * THE SOFTWARE.
  23  */
  24
  25 #include "qemu/osdep.h"
  26 #include "qemu/units.h"
  27 #include "qemu/madvise.h"
  28 #include "qemu/mprotect.h"
  29 #include "qemu/memalign.h"
  30 #include "qemu/cacheinfo.h"
  31 #include "qemu/qtree.h"
  32 #include "qapi/error.h"
  33 #include "tcg/tcg.h"
  34 #include "exec/translation-block.h"
  35 #include "tcg-internal.h"
  36
  37
  38 struct tcg_region_tree {
  39     QemuMutex lock;
  40     QTree *tree;
  41     /* padding to avoid false sharing is computed at run-time */
  42 };
  43
  44 /*
  45  * We divide code_gen_buffer into equally-sized "regions" that TCG threads
  46  * dynamically allocate from as demand dictates. Given appropriate region
  47  * sizing, this minimizes flushes even when some TCG threads generate a lot
  48  * more code than others.
  49  */
  50 struct tcg_region_state {
  51     QemuMutex lock;
  52
  53     /* fields set at init time */
  54     void *start_aligned;
  55     void *after_prologue;
  56     size_t n;
  57     size_t size; /* size of one region */
  58     size_t stride; /* .size + guard size */
  59     size_t total_size; /* size of entire buffer, >= n * stride */
  60
  61     /* fields protected by the lock */
  62     size_t current; /* current region index */
  63     size_t agg_size_full; /* aggregate size of full regions */
  64 };
  65
  66 static struct tcg_region_state region;
  67
  68 /*
  69  * This is an array of struct tcg_region_tree's, with padding.
  70  * We use void * to simplify the computation of region_trees[i]; each
  71  * struct is found every tree_size bytes.
  72  */
  73 static void *region_trees;
  74 static size_t tree_size;
  75
  76 bool in_code_gen_buffer(const void *p)
  77 {
  78     /*
  79      * Much like it is valid to have a pointer to the byte past the
  80      * end of an array (so long as you don't dereference it), allow
  81      * a pointer to the byte past the end of the code gen buffer.
  82      */
  83     return (size_t)(p - region.start_aligned) <= region.total_size;
  84 }
  85
  86 #ifdef CONFIG_DEBUG_TCG
  87 const void *tcg_splitwx_to_rx(void *rw)
  88 {
  89     /* Pass NULL pointers unchanged. */
  90     if (rw) {
  91         g_assert(in_code_gen_buffer(rw));
  92         rw += tcg_splitwx_diff;
  93     }
  94     return rw;
  95 }
  96
  97 void *tcg_splitwx_to_rw(const void *rx)
  98 {
  99     /* Pass NULL pointers unchanged. */
 100     if (rx) {
 101         rx -= tcg_splitwx_diff;
 102         /* Assert that we end with a pointer in the rw region. */
 103         g_assert(in_code_gen_buffer(rx));
 104     }
 105     return (void *)rx;
 106 }
 107 #endif /* CONFIG_DEBUG_TCG */
 108
 109 /* compare a pointer @ptr and a tb_tc @s */
 110 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
 111 {
 112     if (ptr >= s->ptr + s->size) {
 113         return 1;
 114     } else if (ptr < s->ptr) {
 115         return -1;
 116     }
 117     return 0;
 118 }
 119
 120 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp, gpointer userdata)
 121 {
 122     const struct tb_tc *a = ap;
 123     const struct tb_tc *b = bp;
 124
 125     /*
 126      * When both sizes are set, we know this isn't a lookup.
 127      * This is the most likely case: every TB must be inserted; lookups
 128      * are a lot less frequent.
 129      */
 130     if (likely(a->size && b->size)) {
 131         if (a->ptr > b->ptr) {
 132             return 1;
 133         } else if (a->ptr < b->ptr) {
 134             return -1;
 135         }
 136         /* a->ptr == b->ptr should happen only on deletions */
 137         g_assert(a->size == b->size);
 138         return 0;
 139     }
 140     /*
 141      * All lookups have either .size field set to 0.
 142      * From the glib sources we see that @ap is always the lookup key. However
 143      * the docs provide no guarantee, so we just mark this case as likely.
 144      */
 145     if (likely(a->size == 0)) {
 146         return ptr_cmp_tb_tc(a->ptr, b);
 147     }
 148     return ptr_cmp_tb_tc(b->ptr, a);
 149 }
 150
 151 static void tb_destroy(gpointer value)
 152 {
 153     TranslationBlock *tb = value;
 154     qemu_spin_destroy(&tb->jmp_lock);
 155 }
 156
 157 static void tcg_region_trees_init(void)
 158 {
 159     size_t i;
 160
 161     tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
 162     region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
 163     for (i = 0; i < region.n; i++) {
 164         struct tcg_region_tree *rt = region_trees + i * tree_size;
 165
 166         qemu_mutex_init(&rt->lock);
 167         rt->tree = q_tree_new_full(tb_tc_cmp, NULL, NULL, tb_destroy);
 168     }
 169 }
 170
 171 static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p)
 172 {
 173     size_t region_idx;
 174
 175     /*
 176      * Like tcg_splitwx_to_rw, with no assert.  The pc may come from
 177      * a signal handler over which the caller has no control.
 178      */
 179     if (!in_code_gen_buffer(p)) {
 180         p -= tcg_splitwx_diff;
 181         if (!in_code_gen_buffer(p)) {
 182             return NULL;
 183         }
 184     }
 185
 186     if (p < region.start_aligned) {
 187         region_idx = 0;
 188     } else {
 189         ptrdiff_t offset = p - region.start_aligned;
 190
 191         if (offset > region.stride * (region.n - 1)) {
 192             region_idx = region.n - 1;
 193         } else {
 194             region_idx = offset / region.stride;
 195         }
 196     }
 197     return region_trees + region_idx * tree_size;
 198 }
 199
 200 void tcg_tb_insert(TranslationBlock *tb)
 201 {
 202     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
 203
 204     g_assert(rt != NULL);
 205     qemu_mutex_lock(&rt->lock);
 206     q_tree_insert(rt->tree, &tb->tc, tb);
 207     qemu_mutex_unlock(&rt->lock);
 208 }
 209
 210 void tcg_tb_remove(TranslationBlock *tb)
 211 {
 212     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
 213
 214     g_assert(rt != NULL);
 215     qemu_mutex_lock(&rt->lock);
 216     q_tree_remove(rt->tree, &tb->tc);
 217     qemu_mutex_unlock(&rt->lock);
 218 }
 219
 220 /*
 221  * Find the TB 'tb' such that
 222  * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
 223  * Return NULL if not found.
 224  */
 225 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
 226 {
 227     struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
 228     TranslationBlock *tb;
 229     struct tb_tc s = { .ptr = (void *)tc_ptr };
 230
 231     if (rt == NULL) {
 232         return NULL;
 233     }
 234
 235     qemu_mutex_lock(&rt->lock);
 236     tb = q_tree_lookup(rt->tree, &s);
 237     qemu_mutex_unlock(&rt->lock);
 238     return tb;
 239 }
 240
 241 static void tcg_region_tree_lock_all(void)
 242 {
 243     size_t i;
 244
 245     for (i = 0; i < region.n; i++) {
 246         struct tcg_region_tree *rt = region_trees + i * tree_size;
 247
 248         qemu_mutex_lock(&rt->lock);
 249     }
 250 }
 251
 252 static void tcg_region_tree_unlock_all(void)
 253 {
 254     size_t i;
 255
 256     for (i = 0; i < region.n; i++) {
 257         struct tcg_region_tree *rt = region_trees + i * tree_size;
 258
 259         qemu_mutex_unlock(&rt->lock);
 260     }
 261 }
 262
 263 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
 264 {
 265     size_t i;
 266
 267     tcg_region_tree_lock_all();
 268     for (i = 0; i < region.n; i++) {
 269         struct tcg_region_tree *rt = region_trees + i * tree_size;
 270
 271         q_tree_foreach(rt->tree, func, user_data);
 272     }
 273     tcg_region_tree_unlock_all();
 274 }
 275
 276 size_t tcg_nb_tbs(void)
 277 {
 278     size_t nb_tbs = 0;
 279     size_t i;
 280
 281     tcg_region_tree_lock_all();
 282     for (i = 0; i < region.n; i++) {
 283         struct tcg_region_tree *rt = region_trees + i * tree_size;
 284
 285         nb_tbs += q_tree_nnodes(rt->tree);
 286     }
 287     tcg_region_tree_unlock_all();
 288     return nb_tbs;
 289 }
 290
 291 static void tcg_region_tree_reset_all(void)
 292 {
 293     size_t i;
 294
 295     tcg_region_tree_lock_all();
 296     for (i = 0; i < region.n; i++) {
 297         struct tcg_region_tree *rt = region_trees + i * tree_size;
 298
 299         /* Increment the refcount first so that destroy acts as a reset */
 300         q_tree_ref(rt->tree);
 301         q_tree_destroy(rt->tree);
 302     }
 303     tcg_region_tree_unlock_all();
 304 }
 305
 306 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
 307 {
 308     void *start, *end;
 309
 310     start = region.start_aligned + curr_region * region.stride;
 311     end = start + region.size;
 312
 313     if (curr_region == 0) {
 314         start = region.after_prologue;
 315     }
 316     /* The final region may have a few extra pages due to earlier rounding. */
 317     if (curr_region == region.n - 1) {
 318         end = region.start_aligned + region.total_size;
 319     }
 320
 321     *pstart = start;
 322     *pend = end;
 323 }
 324
 325 static void tcg_region_assign(TCGContext *s, size_t curr_region)
 326 {
 327     void *start, *end;
 328
 329     tcg_region_bounds(curr_region, &start, &end);
 330
 331     s->code_gen_buffer = start;
 332     s->code_gen_ptr = start;
 333     s->code_gen_buffer_size = end - start;
 334     s->code_gen_highwater = end - TCG_HIGHWATER;
 335 }
 336
 337 static bool tcg_region_alloc__locked(TCGContext *s)
 338 {
 339     if (region.current == region.n) {
 340         return true;
 341     }
 342     tcg_region_assign(s, region.current);
 343     region.current++;
 344     return false;
 345 }
 346
 347 /*
 348  * Request a new region once the one in use has filled up.
 349  * Returns true on error.
 350  */
 351 bool tcg_region_alloc(TCGContext *s)
 352 {
 353     bool err;
 354     /* read the region size now; alloc__locked will overwrite it on success */
 355     size_t size_full = s->code_gen_buffer_size;
 356
 357     qemu_mutex_lock(&region.lock);
 358     err = tcg_region_alloc__locked(s);
 359     if (!err) {
 360         region.agg_size_full += size_full - TCG_HIGHWATER;
 361     }
 362     qemu_mutex_unlock(&region.lock);
 363     return err;
 364 }
 365
 366 /*
 367  * Perform a context's first region allocation.
 368  * This function does _not_ increment region.agg_size_full.
 369  */
 370 static void tcg_region_initial_alloc__locked(TCGContext *s)
 371 {
 372     bool err = tcg_region_alloc__locked(s);
 373     g_assert(!err);
 374 }
 375
 376 void tcg_region_initial_alloc(TCGContext *s)
 377 {
 378     qemu_mutex_lock(&region.lock);
 379     tcg_region_initial_alloc__locked(s);
 380     qemu_mutex_unlock(&region.lock);
 381 }
 382
 383 /* Call from a safe-work context */
 384 void tcg_region_reset_all(void)
 385 {
 386     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
 387     unsigned int i;
 388
 389     qemu_mutex_lock(&region.lock);
 390     region.current = 0;
 391     region.agg_size_full = 0;
 392
 393     for (i = 0; i < n_ctxs; i++) {
 394         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
 395         tcg_region_initial_alloc__locked(s);
 396     }
 397     qemu_mutex_unlock(&region.lock);
 398
 399     tcg_region_tree_reset_all();
 400 }
 401
 402 static size_t tcg_n_regions(size_t tb_size, unsigned max_cpus)
 403 {
 404 #ifdef CONFIG_USER_ONLY
 405     return 1;
 406 #else
 407     size_t n_regions;
 408
 409     /*
 410      * It is likely that some vCPUs will translate more code than others,
 411      * so we first try to set more regions than max_cpus, with those regions
 412      * being of reasonable size. If that's not possible we make do by evenly
 413      * dividing the code_gen_buffer among the vCPUs.
 414      */
 415     /* Use a single region if all we have is one vCPU thread */
 416     if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
 417         return 1;
 418     }
 419
 420     /*
 421      * Try to have more regions than max_cpus, with each region being >= 2 MB.
 422      * If we can't, then just allocate one region per vCPU thread.
 423      */
 424     n_regions = tb_size / (2 * MiB);
 425     if (n_regions <= max_cpus) {
 426         return max_cpus;
 427     }
 428     return MIN(n_regions, max_cpus * 8);
 429 #endif
 430 }
 431
 432 /*
 433  * Minimum size of the code gen buffer.  This number is randomly chosen,
 434  * but not so small that we can't have a fair number of TB's live.
 435  *
 436  * Maximum size, MAX_CODE_GEN_BUFFER_SIZE, is defined in tcg-target.h.
 437  * Unless otherwise indicated, this is constrained by the range of
 438  * direct branches on the host cpu, as used by the TCG implementation
 439  * of goto_tb.
 440  */
 441 #define MIN_CODE_GEN_BUFFER_SIZE     (1 * MiB)
 442
 443 #if TCG_TARGET_REG_BITS == 32
 444 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32 * MiB)
 445 #ifdef CONFIG_USER_ONLY
 446 /*
 447  * For user mode on smaller 32 bit systems we may run into trouble
 448  * allocating big chunks of data in the right place. On these systems
 449  * we utilise a static code generation buffer directly in the binary.
 450  */
 451 #define USE_STATIC_CODE_GEN_BUFFER
 452 #endif
 453 #else /* TCG_TARGET_REG_BITS == 64 */
 454 #ifdef CONFIG_USER_ONLY
 455 /*
 456  * As user-mode emulation typically means running multiple instances
 457  * of the translator don't go too nuts with our default code gen
 458  * buffer lest we make things too hard for the OS.
 459  */
 460 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (128 * MiB)
 461 #else
 462 /*
 463  * We expect most system emulation to run one or two guests per host.
 464  * Users running large scale system emulation may want to tweak their
 465  * runtime setup via the tb-size control on the command line.
 466  */
 467 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (1 * GiB)
 468 #endif
 469 #endif
 470
 471 #define DEFAULT_CODE_GEN_BUFFER_SIZE \
 472   (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
 473    ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
 474
 475 #ifdef USE_STATIC_CODE_GEN_BUFFER
 476 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
 477     __attribute__((aligned(CODE_GEN_ALIGN)));
 478
 479 static int alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp)
 480 {
 481     void *buf, *end;
 482     size_t size;
 483
 484     if (splitwx > 0) {
 485         error_setg(errp, "jit split-wx not supported");
 486         return -1;
 487     }
 488
 489     /* page-align the beginning and end of the buffer */
 490     buf = static_code_gen_buffer;
 491     end = static_code_gen_buffer + sizeof(static_code_gen_buffer);
 492     buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size());
 493     end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size());
 494
 495     size = end - buf;
 496
 497     /* Honor a command-line option limiting the size of the buffer.  */
 498     if (size > tb_size) {
 499         size = QEMU_ALIGN_DOWN(tb_size, qemu_real_host_page_size());
 500     }
 501
 502     region.start_aligned = buf;
 503     region.total_size = size;
 504
 505     return PROT_READ | PROT_WRITE;
 506 }
 507 #elif defined(_WIN32)
 508 /*
 509  * Local source-level compatibility with Unix.
 510  * Used by tcg_region_init below.
 511  */
 512 #define PROT_READ   1
 513 #define PROT_WRITE  2
 514 #define PROT_EXEC   4
 515
 516 static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
 517 {
 518     void *buf;
 519
 520     if (splitwx > 0) {
 521         error_setg(errp, "jit split-wx not supported");
 522         return -1;
 523     }
 524
 525     buf = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT,
 526                              PAGE_EXECUTE_READWRITE);
 527     if (buf == NULL) {
 528         error_setg_win32(errp, GetLastError(),
 529                          "allocate %zu bytes for jit buffer", size);
 530         return false;
 531     }
 532
 533     region.start_aligned = buf;
 534     region.total_size = size;
 535
 536     return PROT_READ | PROT_WRITE | PROT_EXEC;
 537 }
 538 #else
 539 static int alloc_code_gen_buffer_anon(size_t size, int prot,
 540                                       int flags, Error **errp)
 541 {
 542     void *buf;
 543
 544     buf = mmap(NULL, size, prot, flags, -1, 0);
 545     if (buf == MAP_FAILED) {
 546         error_setg_errno(errp, errno,
 547                          "allocate %zu bytes for jit buffer", size);
 548         return -1;
 549     }
 550
 551     region.start_aligned = buf;
 552     region.total_size = size;
 553     return prot;
 554 }
 555
 556 #ifndef CONFIG_TCG_INTERPRETER
 557 #ifdef CONFIG_POSIX
 558 #include "qemu/memfd.h"
 559
 560 static int alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)
 561 {
 562     void *buf_rw = NULL, *buf_rx = MAP_FAILED;
 563     int fd = -1;
 564
 565     buf_rw = qemu_memfd_alloc("tcg-jit", size, 0, &fd, errp);
 566     if (buf_rw == NULL) {
 567         goto fail;
 568     }
 569
 570     buf_rx = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
 571     if (buf_rx == MAP_FAILED) {
 572         goto fail_rx;
 573     }
 574
 575     close(fd);
 576     region.start_aligned = buf_rw;
 577     region.total_size = size;
 578     tcg_splitwx_diff = buf_rx - buf_rw;
 579
 580     return PROT_READ | PROT_WRITE;
 581
 582  fail_rx:
 583     error_setg_errno(errp, errno, "failed to map shared memory for execute");
 584  fail:
 585     if (buf_rx != MAP_FAILED) {
 586         munmap(buf_rx, size);
 587     }
 588     if (buf_rw) {
 589         munmap(buf_rw, size);
 590     }
 591     if (fd >= 0) {
 592         close(fd);
 593     }
 594     return -1;
 595 }
 596 #endif /* CONFIG_POSIX */
 597
 598 #ifdef CONFIG_DARWIN
 599 #include <mach/mach.h>
 600
 601 extern kern_return_t mach_vm_remap(vm_map_t target_task,
 602                                    mach_vm_address_t *target_address,
 603                                    mach_vm_size_t size,
 604                                    mach_vm_offset_t mask,
 605                                    int flags,
 606                                    vm_map_t src_task,
 607                                    mach_vm_address_t src_address,
 608                                    boolean_t copy,
 609                                    vm_prot_t *cur_protection,
 610                                    vm_prot_t *max_protection,
 611                                    vm_inherit_t inheritance);
 612
 613 static int alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp)
 614 {
 615     kern_return_t ret;
 616     mach_vm_address_t buf_rw, buf_rx;
 617     vm_prot_t cur_prot, max_prot;
 618
 619     /* Map the read-write portion via normal anon memory. */
 620     if (!alloc_code_gen_buffer_anon(size, PROT_READ | PROT_WRITE,
 621                                     MAP_PRIVATE | MAP_ANONYMOUS, errp)) {
 622         return -1;
 623     }
 624
 625     buf_rw = (mach_vm_address_t)region.start_aligned;
 626     buf_rx = 0;
 627     ret = mach_vm_remap(mach_task_self(),
 628                         &buf_rx,
 629                         size,
 630                         0,
 631                         VM_FLAGS_ANYWHERE,
 632                         mach_task_self(),
 633                         buf_rw,
 634                         false,
 635                         &cur_prot,
 636                         &max_prot,
 637                         VM_INHERIT_NONE);
 638     if (ret != KERN_SUCCESS) {
 639         /* TODO: Convert "ret" to a human readable error message. */
 640         error_setg(errp, "vm_remap for jit splitwx failed");
 641         munmap((void *)buf_rw, size);
 642         return -1;
 643     }
 644
 645     if (mprotect((void *)buf_rx, size, PROT_READ | PROT_EXEC) != 0) {
 646         error_setg_errno(errp, errno, "mprotect for jit splitwx");
 647         munmap((void *)buf_rx, size);
 648         munmap((void *)buf_rw, size);
 649         return -1;
 650     }
 651
 652     tcg_splitwx_diff = buf_rx - buf_rw;
 653     return PROT_READ | PROT_WRITE;
 654 }
 655 #endif /* CONFIG_DARWIN */
 656 #endif /* CONFIG_TCG_INTERPRETER */
 657
 658 static int alloc_code_gen_buffer_splitwx(size_t size, Error **errp)
 659 {
 660 #ifndef CONFIG_TCG_INTERPRETER
 661 # ifdef CONFIG_DARWIN
 662     return alloc_code_gen_buffer_splitwx_vmremap(size, errp);
 663 # endif
 664 # ifdef CONFIG_POSIX
 665     return alloc_code_gen_buffer_splitwx_memfd(size, errp);
 666 # endif
 667 #endif
 668     error_setg(errp, "jit split-wx not supported");
 669     return -1;
 670 }
 671
 672 static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
 673 {
 674     ERRP_GUARD();
 675     int prot, flags;
 676
 677     if (splitwx) {
 678         prot = alloc_code_gen_buffer_splitwx(size, errp);
 679         if (prot >= 0) {
 680             return prot;
 681         }
 682         /*
 683          * If splitwx force-on (1), fail;
 684          * if splitwx default-on (-1), fall through to splitwx off.
 685          */
 686         if (splitwx > 0) {
 687             return -1;
 688         }
 689         error_free_or_abort(errp);
 690     }
 691
 692     /*
 693      * macOS 11.2 has a bug (Apple Feedback FB8994773) in which mprotect
 694      * rejects a permission change from RWX -> NONE when reserving the
 695      * guard pages later.  We can go the other way with the same number
 696      * of syscalls, so always begin with PROT_NONE.
 697      */
 698     prot = PROT_NONE;
 699     flags = MAP_PRIVATE | MAP_ANONYMOUS;
 700 #ifdef CONFIG_DARWIN
 701     /* Applicable to both iOS and macOS (Apple Silicon). */
 702     if (!splitwx) {
 703         flags |= MAP_JIT;
 704     }
 705 #endif
 706
 707     return alloc_code_gen_buffer_anon(size, prot, flags, errp);
 708 }
 709 #endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */
 710
 711 /*
 712  * Initializes region partitioning.
 713  *
 714  * Called at init time from the parent thread (i.e. the one calling
 715  * tcg_context_init), after the target's TCG globals have been set.
 716  *
 717  * Region partitioning works by splitting code_gen_buffer into separate regions,
 718  * and then assigning regions to TCG threads so that the threads can translate
 719  * code in parallel without synchronization.
 720  *
 721  * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
 722  * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
 723  * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
 724  * must have been parsed before calling this function, since it calls
 725  * qemu_tcg_mttcg_enabled().
 726  *
 727  * In user-mode we use a single region.  Having multiple regions in user-mode
 728  * is not supported, because the number of vCPU threads (recall that each thread
 729  * spawned by the guest corresponds to a vCPU thread) is only bounded by the
 730  * OS, and usually this number is huge (tens of thousands is not uncommon).
 731  * Thus, given this large bound on the number of vCPU threads and the fact
 732  * that code_gen_buffer is allocated at compile-time, we cannot guarantee
 733  * that the availability of at least one region per vCPU thread.
 734  *
 735  * However, this user-mode limitation is unlikely to be a significant problem
 736  * in practice. Multi-threaded guests share most if not all of their translated
 737  * code, which makes parallel code generation less appealing than in softmmu.
 738  */
 739 void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus)
 740 {
 741     const size_t page_size = qemu_real_host_page_size();
 742     size_t region_size;
 743     int have_prot, need_prot;
 744
 745     /* Size the buffer.  */
 746     if (tb_size == 0) {
 747         size_t phys_mem = qemu_get_host_physmem();
 748         if (phys_mem == 0) {
 749             tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
 750         } else {
 751             tb_size = QEMU_ALIGN_DOWN(phys_mem / 8, page_size);
 752             tb_size = MIN(DEFAULT_CODE_GEN_BUFFER_SIZE, tb_size);
 753         }
 754     }
 755     if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
 756         tb_size = MIN_CODE_GEN_BUFFER_SIZE;
 757     }
 758     if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
 759         tb_size = MAX_CODE_GEN_BUFFER_SIZE;
 760     }
 761
 762     have_prot = alloc_code_gen_buffer(tb_size, splitwx, &error_fatal);
 763     assert(have_prot >= 0);
 764
 765     /* Request large pages for the buffer and the splitwx.  */
 766     qemu_madvise(region.start_aligned, region.total_size, QEMU_MADV_HUGEPAGE);
 767     if (tcg_splitwx_diff) {
 768         qemu_madvise(region.start_aligned + tcg_splitwx_diff,
 769                      region.total_size, QEMU_MADV_HUGEPAGE);
 770     }
 771
 772     /*
 773      * Make region_size a multiple of page_size, using aligned as the start.
 774      * As a result of this we might end up with a few extra pages at the end of
 775      * the buffer; we will assign those to the last region.
 776      */
 777     region.n = tcg_n_regions(tb_size, max_cpus);
 778     region_size = tb_size / region.n;
 779     region_size = QEMU_ALIGN_DOWN(region_size, page_size);
 780
 781     /* A region must have at least 2 pages; one code, one guard */
 782     g_assert(region_size >= 2 * page_size);
 783     region.stride = region_size;
 784
 785     /* Reserve space for guard pages. */
 786     region.size = region_size - page_size;
 787     region.total_size -= page_size;
 788
 789     /*
 790      * The first region will be smaller than the others, via the prologue,
 791      * which has yet to be allocated.  For now, the first region begins at
 792      * the page boundary.
 793      */
 794     region.after_prologue = region.start_aligned;
 795
 796     /* init the region struct */
 797     qemu_mutex_init(&region.lock);
 798
 799     /*
 800      * Set guard pages in the rw buffer, as that's the one into which
 801      * buffer overruns could occur.  Do not set guard pages in the rx
 802      * buffer -- let that one use hugepages throughout.
 803      * Work with the page protections set up with the initial mapping.
 804      */
 805     need_prot = PROT_READ | PROT_WRITE;
 806 #ifndef CONFIG_TCG_INTERPRETER
 807     if (tcg_splitwx_diff == 0) {
 808         need_prot |= PROT_EXEC;
 809     }
 810 #endif
 811     for (size_t i = 0, n = region.n; i < n; i++) {
 812         void *start, *end;
 813
 814         tcg_region_bounds(i, &start, &end);
 815         if (have_prot != need_prot) {
 816             int rc;
 817
 818             if (need_prot == (PROT_READ | PROT_WRITE | PROT_EXEC)) {
 819                 rc = qemu_mprotect_rwx(start, end - start);
 820             } else if (need_prot == (PROT_READ | PROT_WRITE)) {
 821                 rc = qemu_mprotect_rw(start, end - start);
 822             } else {
 823                 g_assert_not_reached();
 824             }
 825             if (rc) {
 826                 error_setg_errno(&error_fatal, errno,
 827                                  "mprotect of jit buffer");
 828             }
 829         }
 830         if (have_prot != 0) {
 831             /* Guard pages are nice for bug detection but are not essential. */
 832             (void)qemu_mprotect_none(end, page_size);
 833         }
 834     }
 835
 836     tcg_region_trees_init();
 837
 838     /*
 839      * Leave the initial context initialized to the first region.
 840      * This will be the context into which we generate the prologue.
 841      * It is also the only context for CONFIG_USER_ONLY.
 842      */
 843     tcg_region_initial_alloc__locked(&tcg_init_ctx);
 844 }
 845
 846 void tcg_region_prologue_set(TCGContext *s)
 847 {
 848     /* Deduct the prologue from the first region.  */
 849     g_assert(region.start_aligned == s->code_gen_buffer);
 850     region.after_prologue = s->code_ptr;
 851
 852     /* Recompute boundaries of the first region. */
 853     tcg_region_assign(s, 0);
 854
 855     /* Register the balance of the buffer with gdb. */
 856     tcg_register_jit(tcg_splitwx_to_rx(region.after_prologue),
 857                      region.start_aligned + region.total_size -
 858                      region.after_prologue);
 859 }
 860
 861 /*
 862  * Returns the size (in bytes) of all translated code (i.e. from all regions)
 863  * currently in the cache.
 864  * See also: tcg_code_capacity()
 865  * Do not confuse with tcg_current_code_size(); that one applies to a single
 866  * TCG context.
 867  */
 868 size_t tcg_code_size(void)
 869 {
 870     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
 871     unsigned int i;
 872     size_t total;
 873
 874     qemu_mutex_lock(&region.lock);
 875     total = region.agg_size_full;
 876     for (i = 0; i < n_ctxs; i++) {
 877         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
 878         size_t size;
 879
 880         size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
 881         g_assert(size <= s->code_gen_buffer_size);
 882         total += size;
 883     }
 884     qemu_mutex_unlock(&region.lock);
 885     return total;
 886 }
 887
 888 /*
 889  * Returns the code capacity (in bytes) of the entire cache, i.e. including all
 890  * regions.
 891  * See also: tcg_code_size()
 892  */
 893 size_t tcg_code_capacity(void)
 894 {
 895     size_t guard_size, capacity;
 896
 897     /* no need for synchronization; these variables are set at init time */
 898     guard_size = region.stride - region.size;
 899     capacity = region.total_size;
 900     capacity -= (region.n - 1) * guard_size;
 901     capacity -= region.n * TCG_HIGHWATER;
 902
 903     return capacity;
 904 }