tcg/region.c

   1 /*
   2  * Memory region management for Tiny Code Generator for QEMU
   3  *
   4  * Copyright (c) 2008 Fabrice Bellard
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a copy
   7  * of this software and associated documentation files (the "Software"), to deal
   8  * in the Software without restriction, including without limitation the rights
   9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10  * copies of the Software, and to permit persons to whom the Software is
  11  * furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included in
  14  * all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22  * THE SOFTWARE.
  23  */
  24
  25 #include "qemu/osdep.h"
  26 #include "qemu/units.h"
  27 #include "qemu/madvise.h"
  28 #include "qemu/mprotect.h"
  29 #include "qemu/memalign.h"
  30 #include "qemu/cacheinfo.h"
  31 #include "qemu/qtree.h"
  32 #include "qapi/error.h"
  33 #include "exec/exec-all.h"
  34 #include "tcg/tcg.h"
  35 #include "tcg-internal.h"
  36
  37
  38 struct tcg_region_tree {
  39     QemuMutex lock;
  40     QTree *tree;
  41     /* padding to avoid false sharing is computed at run-time */
  42 };
  43
  44 /*
  45  * We divide code_gen_buffer into equally-sized "regions" that TCG threads
  46  * dynamically allocate from as demand dictates. Given appropriate region
  47  * sizing, this minimizes flushes even when some TCG threads generate a lot
  48  * more code than others.
  49  */
  50 struct tcg_region_state {
  51     QemuMutex lock;
  52
  53     /* fields set at init time */
  54     void *start_aligned;
  55     void *after_prologue;
  56     size_t n;
  57     size_t size; /* size of one region */
  58     size_t stride; /* .size + guard size */
  59     size_t total_size; /* size of entire buffer, >= n * stride */
  60
  61     /* fields protected by the lock */
  62     size_t current; /* current region index */
  63     size_t agg_size_full; /* aggregate size of full regions */
  64 };
  65
  66 static struct tcg_region_state region;
  67
  68 /*
  69  * This is an array of struct tcg_region_tree's, with padding.
  70  * We use void * to simplify the computation of region_trees[i]; each
  71  * struct is found every tree_size bytes.
  72  */
  73 static void *region_trees;
  74 static size_t tree_size;
  75
  76 bool in_code_gen_buffer(const void *p)
  77 {
  78     /*
  79      * Much like it is valid to have a pointer to the byte past the
  80      * end of an array (so long as you don't dereference it), allow
  81      * a pointer to the byte past the end of the code gen buffer.
  82      */
  83     return (size_t)(p - region.start_aligned) <= region.total_size;
  84 }
  85
  86 #ifdef CONFIG_DEBUG_TCG
  87 const void *tcg_splitwx_to_rx(void *rw)
  88 {
  89     /* Pass NULL pointers unchanged. */
  90     if (rw) {
  91         g_assert(in_code_gen_buffer(rw));
  92         rw += tcg_splitwx_diff;
  93     }
  94     return rw;
  95 }
  96
  97 void *tcg_splitwx_to_rw(const void *rx)
  98 {
  99     /* Pass NULL pointers unchanged. */
 100     if (rx) {
 101         rx -= tcg_splitwx_diff;
 102         /* Assert that we end with a pointer in the rw region. */
 103         g_assert(in_code_gen_buffer(rx));
 104     }
 105     return (void *)rx;
 106 }
 107 #endif /* CONFIG_DEBUG_TCG */
 108
 109 /* compare a pointer @ptr and a tb_tc @s */
 110 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
 111 {
 112     if (ptr >= s->ptr + s->size) {
 113         return 1;
 114     } else if (ptr < s->ptr) {
 115         return -1;
 116     }
 117     return 0;
 118 }
 119
 120 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp, gpointer userdata)
 121 {
 122     const struct tb_tc *a = ap;
 123     const struct tb_tc *b = bp;
 124
 125     /*
 126      * When both sizes are set, we know this isn't a lookup.
 127      * This is the most likely case: every TB must be inserted; lookups
 128      * are a lot less frequent.
 129      */
 130     if (likely(a->size && b->size)) {
 131         if (a->ptr > b->ptr) {
 132             return 1;
 133         } else if (a->ptr < b->ptr) {
 134             return -1;
 135         }
 136         /* a->ptr == b->ptr should happen only on deletions */
 137         g_assert(a->size == b->size);
 138         return 0;
 139     }
 140     /*
 141      * All lookups have either .size field set to 0.
 142      * From the glib sources we see that @ap is always the lookup key. However
 143      * the docs provide no guarantee, so we just mark this case as likely.
 144      */
 145     if (likely(a->size == 0)) {
 146         return ptr_cmp_tb_tc(a->ptr, b);
 147     }
 148     return ptr_cmp_tb_tc(b->ptr, a);
 149 }
 150
 151 static void tb_destroy(gpointer value)
 152 {
 153     TranslationBlock *tb = value;
 154     qemu_spin_destroy(&tb->jmp_lock);
 155 }
 156
 157 static void tcg_region_trees_init(void)
 158 {
 159     size_t i;
 160
 161     tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
 162     region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
 163     for (i = 0; i < region.n; i++) {
 164         struct tcg_region_tree *rt = region_trees + i * tree_size;
 165
 166         qemu_mutex_init(&rt->lock);
 167         rt->tree = q_tree_new_full(tb_tc_cmp, NULL, NULL, tb_destroy);
 168     }
 169 }
 170
 171 static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p)
 172 {
 173     size_t region_idx;
 174
 175     /*
 176      * Like tcg_splitwx_to_rw, with no assert.  The pc may come from
 177      * a signal handler over which the caller has no control.
 178      */
 179     if (!in_code_gen_buffer(p)) {
 180         p -= tcg_splitwx_diff;
 181         if (!in_code_gen_buffer(p)) {
 182             return NULL;
 183         }
 184     }
 185
 186     if (p < region.start_aligned) {
 187         region_idx = 0;
 188     } else {
 189         ptrdiff_t offset = p - region.start_aligned;
 190
 191         if (offset > region.stride * (region.n - 1)) {
 192             region_idx = region.n - 1;
 193         } else {
 194             region_idx = offset / region.stride;
 195         }
 196     }
 197     return region_trees + region_idx * tree_size;
 198 }
 199
 200 void tcg_tb_insert(TranslationBlock *tb)
 201 {
 202     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
 203
 204     g_assert(rt != NULL);
 205     qemu_mutex_lock(&rt->lock);
 206     q_tree_insert(rt->tree, &tb->tc, tb);
 207     qemu_mutex_unlock(&rt->lock);
 208 }
 209
 210 void tcg_tb_remove(TranslationBlock *tb)
 211 {
 212     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
 213
 214     g_assert(rt != NULL);
 215     qemu_mutex_lock(&rt->lock);
 216     q_tree_remove(rt->tree, &tb->tc);
 217     qemu_mutex_unlock(&rt->lock);
 218 }
 219
 220 /*
 221  * Find the TB 'tb' such that
 222  * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
 223  * Return NULL if not found.
 224  */
 225 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
 226 {
 227     struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
 228     TranslationBlock *tb;
 229     struct tb_tc s = { .ptr = (void *)tc_ptr };
 230
 231     if (rt == NULL) {
 232         return NULL;
 233     }
 234
 235     qemu_mutex_lock(&rt->lock);
 236     tb = q_tree_lookup(rt->tree, &s);
 237     qemu_mutex_unlock(&rt->lock);
 238     return tb;
 239 }
 240
 241 static void tcg_region_tree_lock_all(void)
 242 {
 243     size_t i;
 244
 245     for (i = 0; i < region.n; i++) {
 246         struct tcg_region_tree *rt = region_trees + i * tree_size;
 247
 248         qemu_mutex_lock(&rt->lock);
 249     }
 250 }
 251
 252 static void tcg_region_tree_unlock_all(void)
 253 {
 254     size_t i;
 255
 256     for (i = 0; i < region.n; i++) {
 257         struct tcg_region_tree *rt = region_trees + i * tree_size;
 258
 259         qemu_mutex_unlock(&rt->lock);
 260     }
 261 }
 262
 263 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
 264 {
 265     size_t i;
 266
 267     tcg_region_tree_lock_all();
 268     for (i = 0; i < region.n; i++) {
 269         struct tcg_region_tree *rt = region_trees + i * tree_size;
 270
 271         q_tree_foreach(rt->tree, func, user_data);
 272     }
 273     tcg_region_tree_unlock_all();
 274 }
 275
 276 size_t tcg_nb_tbs(void)
 277 {
 278     size_t nb_tbs = 0;
 279     size_t i;
 280
 281     tcg_region_tree_lock_all();
 282     for (i = 0; i < region.n; i++) {
 283         struct tcg_region_tree *rt = region_trees + i * tree_size;
 284
 285         nb_tbs += q_tree_nnodes(rt->tree);
 286     }
 287     tcg_region_tree_unlock_all();
 288     return nb_tbs;
 289 }
 290
 291 static void tcg_region_tree_reset_all(void)
 292 {
 293     size_t i;
 294
 295     tcg_region_tree_lock_all();
 296     for (i = 0; i < region.n; i++) {
 297         struct tcg_region_tree *rt = region_trees + i * tree_size;
 298
 299         /* Increment the refcount first so that destroy acts as a reset */
 300         q_tree_ref(rt->tree);
 301         q_tree_destroy(rt->tree);
 302     }
 303     tcg_region_tree_unlock_all();
 304 }
 305
 306 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
 307 {
 308     void *start, *end;
 309
 310     start = region.start_aligned + curr_region * region.stride;
 311     end = start + region.size;
 312
 313     if (curr_region == 0) {
 314         start = region.after_prologue;
 315     }
 316     /* The final region may have a few extra pages due to earlier rounding. */
 317     if (curr_region == region.n - 1) {
 318         end = region.start_aligned + region.total_size;
 319     }
 320
 321     *pstart = start;
 322     *pend = end;
 323 }
 324
 325 static void tcg_region_assign(TCGContext *s, size_t curr_region)
 326 {
 327     void *start, *end;
 328
 329     tcg_region_bounds(curr_region, &start, &end);
 330
 331     s->code_gen_buffer = start;
 332     s->code_gen_ptr = start;
 333     s->code_gen_buffer_size = end - start;
 334     s->code_gen_highwater = end - TCG_HIGHWATER;
 335 }
 336
 337 static bool tcg_region_alloc__locked(TCGContext *s)
 338 {
 339     if (region.current == region.n) {
 340         return true;
 341     }
 342     tcg_region_assign(s, region.current);
 343     region.current++;
 344     return false;
 345 }
 346
 347 /*
 348  * Request a new region once the one in use has filled up.
 349  * Returns true on error.
 350  */
 351 bool tcg_region_alloc(TCGContext *s)
 352 {
 353     bool err;
 354     /* read the region size now; alloc__locked will overwrite it on success */
 355     size_t size_full = s->code_gen_buffer_size;
 356
 357     qemu_mutex_lock(&region.lock);
 358     err = tcg_region_alloc__locked(s);
 359     if (!err) {
 360         region.agg_size_full += size_full - TCG_HIGHWATER;
 361     }
 362     qemu_mutex_unlock(&region.lock);
 363     return err;
 364 }
 365
 366 /*
 367  * Perform a context's first region allocation.
 368  * This function does _not_ increment region.agg_size_full.
 369  */
 370 static void tcg_region_initial_alloc__locked(TCGContext *s)
 371 {
 372     bool err = tcg_region_alloc__locked(s);
 373     g_assert(!err);
 374 }
 375
 376 void tcg_region_initial_alloc(TCGContext *s)
 377 {
 378     qemu_mutex_lock(&region.lock);
 379     tcg_region_initial_alloc__locked(s);
 380     qemu_mutex_unlock(&region.lock);
 381 }
 382
 383 /* Call from a safe-work context */
 384 void tcg_region_reset_all(void)
 385 {
 386     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
 387     unsigned int i;
 388
 389     qemu_mutex_lock(&region.lock);
 390     region.current = 0;
 391     region.agg_size_full = 0;
 392
 393     for (i = 0; i < n_ctxs; i++) {
 394         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
 395         tcg_region_initial_alloc__locked(s);
 396     }
 397     qemu_mutex_unlock(&region.lock);
 398
 399     tcg_region_tree_reset_all();
 400 }
 401
 402 static size_t tcg_n_regions(size_t tb_size, unsigned max_cpus)
 403 {
 404 #ifdef CONFIG_USER_ONLY
 405     return 1;
 406 #else
 407     size_t n_regions;
 408
 409     /*
 410      * It is likely that some vCPUs will translate more code than others,
 411      * so we first try to set more regions than max_cpus, with those regions
 412      * being of reasonable size. If that's not possible we make do by evenly
 413      * dividing the code_gen_buffer among the vCPUs.
 414      */
 415     /* Use a single region if all we have is one vCPU thread */
 416     if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
 417         return 1;
 418     }
 419
 420     /*
 421      * Try to have more regions than max_cpus, with each region being >= 2 MB.
 422      * If we can't, then just allocate one region per vCPU thread.
 423      */
 424     n_regions = tb_size / (2 * MiB);
 425     if (n_regions <= max_cpus) {
 426         return max_cpus;
 427     }
 428     return MIN(n_regions, max_cpus * 8);
 429 #endif
 430 }
 431
 432 /*
 433  * Minimum size of the code gen buffer.  This number is randomly chosen,
 434  * but not so small that we can't have a fair number of TB's live.
 435  *
 436  * Maximum size, MAX_CODE_GEN_BUFFER_SIZE, is defined in tcg-target.h.
 437  * Unless otherwise indicated, this is constrained by the range of
 438  * direct branches on the host cpu, as used by the TCG implementation
 439  * of goto_tb.
 440  */
 441 #define MIN_CODE_GEN_BUFFER_SIZE     (1 * MiB)
 442
 443 #if TCG_TARGET_REG_BITS == 32
 444 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32 * MiB)
 445 #ifdef CONFIG_USER_ONLY
 446 /*
 447  * For user mode on smaller 32 bit systems we may run into trouble
 448  * allocating big chunks of data in the right place. On these systems
 449  * we utilise a static code generation buffer directly in the binary.
 450  */
 451 #define USE_STATIC_CODE_GEN_BUFFER
 452 #endif
 453 #else /* TCG_TARGET_REG_BITS == 64 */
 454 #ifdef CONFIG_USER_ONLY
 455 /*
 456  * As user-mode emulation typically means running multiple instances
 457  * of the translator don't go too nuts with our default code gen
 458  * buffer lest we make things too hard for the OS.
 459  */
 460 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (128 * MiB)
 461 #else
 462 /*
 463  * We expect most system emulation to run one or two guests per host.
 464  * Users running large scale system emulation may want to tweak their
 465  * runtime setup via the tb-size control on the command line.
 466  */
 467 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (1 * GiB)
 468 #endif
 469 #endif
 470
 471 #define DEFAULT_CODE_GEN_BUFFER_SIZE \
 472   (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
 473    ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
 474
 475 #ifdef USE_STATIC_CODE_GEN_BUFFER
 476 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
 477     __attribute__((aligned(CODE_GEN_ALIGN)));
 478
 479 static int alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp)
 480 {
 481     void *buf, *end;
 482     size_t size;
 483
 484     if (splitwx > 0) {
 485         error_setg(errp, "jit split-wx not supported");
 486         return -1;
 487     }
 488
 489     /* page-align the beginning and end of the buffer */
 490     buf = static_code_gen_buffer;
 491     end = static_code_gen_buffer + sizeof(static_code_gen_buffer);
 492     buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size());
 493     end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size());
 494
 495     size = end - buf;
 496
 497     /* Honor a command-line option limiting the size of the buffer.  */
 498     if (size > tb_size) {
 499         size = QEMU_ALIGN_DOWN(tb_size, qemu_real_host_page_size());
 500     }
 501
 502     region.start_aligned = buf;
 503     region.total_size = size;
 504
 505     return PROT_READ | PROT_WRITE;
 506 }
 507 #elif defined(_WIN32)
 508 static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
 509 {
 510     void *buf;
 511
 512     if (splitwx > 0) {
 513         error_setg(errp, "jit split-wx not supported");
 514         return -1;
 515     }
 516
 517     buf = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT,
 518                              PAGE_EXECUTE_READWRITE);
 519     if (buf == NULL) {
 520         error_setg_win32(errp, GetLastError(),
 521                          "allocate %zu bytes for jit buffer", size);
 522         return false;
 523     }
 524
 525     region.start_aligned = buf;
 526     region.total_size = size;
 527
 528     return PAGE_READ | PAGE_WRITE | PAGE_EXEC;
 529 }
 530 #else
 531 static int alloc_code_gen_buffer_anon(size_t size, int prot,
 532                                       int flags, Error **errp)
 533 {
 534     void *buf;
 535
 536     buf = mmap(NULL, size, prot, flags, -1, 0);
 537     if (buf == MAP_FAILED) {
 538         error_setg_errno(errp, errno,
 539                          "allocate %zu bytes for jit buffer", size);
 540         return -1;
 541     }
 542
 543     region.start_aligned = buf;
 544     region.total_size = size;
 545     return prot;
 546 }
 547
 548 #ifndef CONFIG_TCG_INTERPRETER
 549 #ifdef CONFIG_POSIX
 550 #include "qemu/memfd.h"
 551
 552 static int alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)
 553 {
 554     void *buf_rw = NULL, *buf_rx = MAP_FAILED;
 555     int fd = -1;
 556
 557     buf_rw = qemu_memfd_alloc("tcg-jit", size, 0, &fd, errp);
 558     if (buf_rw == NULL) {
 559         goto fail;
 560     }
 561
 562     buf_rx = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
 563     if (buf_rx == MAP_FAILED) {
 564         goto fail_rx;
 565     }
 566
 567     close(fd);
 568     region.start_aligned = buf_rw;
 569     region.total_size = size;
 570     tcg_splitwx_diff = buf_rx - buf_rw;
 571
 572     return PROT_READ | PROT_WRITE;
 573
 574  fail_rx:
 575     error_setg_errno(errp, errno, "failed to map shared memory for execute");
 576  fail:
 577     if (buf_rx != MAP_FAILED) {
 578         munmap(buf_rx, size);
 579     }
 580     if (buf_rw) {
 581         munmap(buf_rw, size);
 582     }
 583     if (fd >= 0) {
 584         close(fd);
 585     }
 586     return -1;
 587 }
 588 #endif /* CONFIG_POSIX */
 589
 590 #ifdef CONFIG_DARWIN
 591 #include <mach/mach.h>
 592
 593 extern kern_return_t mach_vm_remap(vm_map_t target_task,
 594                                    mach_vm_address_t *target_address,
 595                                    mach_vm_size_t size,
 596                                    mach_vm_offset_t mask,
 597                                    int flags,
 598                                    vm_map_t src_task,
 599                                    mach_vm_address_t src_address,
 600                                    boolean_t copy,
 601                                    vm_prot_t *cur_protection,
 602                                    vm_prot_t *max_protection,
 603                                    vm_inherit_t inheritance);
 604
 605 static int alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp)
 606 {
 607     kern_return_t ret;
 608     mach_vm_address_t buf_rw, buf_rx;
 609     vm_prot_t cur_prot, max_prot;
 610
 611     /* Map the read-write portion via normal anon memory. */
 612     if (!alloc_code_gen_buffer_anon(size, PROT_READ | PROT_WRITE,
 613                                     MAP_PRIVATE | MAP_ANONYMOUS, errp)) {
 614         return -1;
 615     }
 616
 617     buf_rw = (mach_vm_address_t)region.start_aligned;
 618     buf_rx = 0;
 619     ret = mach_vm_remap(mach_task_self(),
 620                         &buf_rx,
 621                         size,
 622                         0,
 623                         VM_FLAGS_ANYWHERE,
 624                         mach_task_self(),
 625                         buf_rw,
 626                         false,
 627                         &cur_prot,
 628                         &max_prot,
 629                         VM_INHERIT_NONE);
 630     if (ret != KERN_SUCCESS) {
 631         /* TODO: Convert "ret" to a human readable error message. */
 632         error_setg(errp, "vm_remap for jit splitwx failed");
 633         munmap((void *)buf_rw, size);
 634         return -1;
 635     }
 636
 637     if (mprotect((void *)buf_rx, size, PROT_READ | PROT_EXEC) != 0) {
 638         error_setg_errno(errp, errno, "mprotect for jit splitwx");
 639         munmap((void *)buf_rx, size);
 640         munmap((void *)buf_rw, size);
 641         return -1;
 642     }
 643
 644     tcg_splitwx_diff = buf_rx - buf_rw;
 645     return PROT_READ | PROT_WRITE;
 646 }
 647 #endif /* CONFIG_DARWIN */
 648 #endif /* CONFIG_TCG_INTERPRETER */
 649
 650 static int alloc_code_gen_buffer_splitwx(size_t size, Error **errp)
 651 {
 652 #ifndef CONFIG_TCG_INTERPRETER
 653 # ifdef CONFIG_DARWIN
 654     return alloc_code_gen_buffer_splitwx_vmremap(size, errp);
 655 # endif
 656 # ifdef CONFIG_POSIX
 657     return alloc_code_gen_buffer_splitwx_memfd(size, errp);
 658 # endif
 659 #endif
 660     error_setg(errp, "jit split-wx not supported");
 661     return -1;
 662 }
 663
 664 static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
 665 {
 666     ERRP_GUARD();
 667     int prot, flags;
 668
 669     if (splitwx) {
 670         prot = alloc_code_gen_buffer_splitwx(size, errp);
 671         if (prot >= 0) {
 672             return prot;
 673         }
 674         /*
 675          * If splitwx force-on (1), fail;
 676          * if splitwx default-on (-1), fall through to splitwx off.
 677          */
 678         if (splitwx > 0) {
 679             return -1;
 680         }
 681         error_free_or_abort(errp);
 682     }
 683
 684     /*
 685      * macOS 11.2 has a bug (Apple Feedback FB8994773) in which mprotect
 686      * rejects a permission change from RWX -> NONE when reserving the
 687      * guard pages later.  We can go the other way with the same number
 688      * of syscalls, so always begin with PROT_NONE.
 689      */
 690     prot = PROT_NONE;
 691     flags = MAP_PRIVATE | MAP_ANONYMOUS;
 692 #ifdef CONFIG_DARWIN
 693     /* Applicable to both iOS and macOS (Apple Silicon). */
 694     if (!splitwx) {
 695         flags |= MAP_JIT;
 696     }
 697 #endif
 698
 699     return alloc_code_gen_buffer_anon(size, prot, flags, errp);
 700 }
 701 #endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */
 702
 703 /*
 704  * Initializes region partitioning.
 705  *
 706  * Called at init time from the parent thread (i.e. the one calling
 707  * tcg_context_init), after the target's TCG globals have been set.
 708  *
 709  * Region partitioning works by splitting code_gen_buffer into separate regions,
 710  * and then assigning regions to TCG threads so that the threads can translate
 711  * code in parallel without synchronization.
 712  *
 713  * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
 714  * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
 715  * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
 716  * must have been parsed before calling this function, since it calls
 717  * qemu_tcg_mttcg_enabled().
 718  *
 719  * In user-mode we use a single region.  Having multiple regions in user-mode
 720  * is not supported, because the number of vCPU threads (recall that each thread
 721  * spawned by the guest corresponds to a vCPU thread) is only bounded by the
 722  * OS, and usually this number is huge (tens of thousands is not uncommon).
 723  * Thus, given this large bound on the number of vCPU threads and the fact
 724  * that code_gen_buffer is allocated at compile-time, we cannot guarantee
 725  * that the availability of at least one region per vCPU thread.
 726  *
 727  * However, this user-mode limitation is unlikely to be a significant problem
 728  * in practice. Multi-threaded guests share most if not all of their translated
 729  * code, which makes parallel code generation less appealing than in softmmu.
 730  */
 731 void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus)
 732 {
 733     const size_t page_size = qemu_real_host_page_size();
 734     size_t region_size;
 735     int have_prot, need_prot;
 736
 737     /* Size the buffer.  */
 738     if (tb_size == 0) {
 739         size_t phys_mem = qemu_get_host_physmem();
 740         if (phys_mem == 0) {
 741             tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
 742         } else {
 743             tb_size = QEMU_ALIGN_DOWN(phys_mem / 8, page_size);
 744             tb_size = MIN(DEFAULT_CODE_GEN_BUFFER_SIZE, tb_size);
 745         }
 746     }
 747     if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
 748         tb_size = MIN_CODE_GEN_BUFFER_SIZE;
 749     }
 750     if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
 751         tb_size = MAX_CODE_GEN_BUFFER_SIZE;
 752     }
 753
 754     have_prot = alloc_code_gen_buffer(tb_size, splitwx, &error_fatal);
 755     assert(have_prot >= 0);
 756
 757     /* Request large pages for the buffer and the splitwx.  */
 758     qemu_madvise(region.start_aligned, region.total_size, QEMU_MADV_HUGEPAGE);
 759     if (tcg_splitwx_diff) {
 760         qemu_madvise(region.start_aligned + tcg_splitwx_diff,
 761                      region.total_size, QEMU_MADV_HUGEPAGE);
 762     }
 763
 764     /*
 765      * Make region_size a multiple of page_size, using aligned as the start.
 766      * As a result of this we might end up with a few extra pages at the end of
 767      * the buffer; we will assign those to the last region.
 768      */
 769     region.n = tcg_n_regions(tb_size, max_cpus);
 770     region_size = tb_size / region.n;
 771     region_size = QEMU_ALIGN_DOWN(region_size, page_size);
 772
 773     /* A region must have at least 2 pages; one code, one guard */
 774     g_assert(region_size >= 2 * page_size);
 775     region.stride = region_size;
 776
 777     /* Reserve space for guard pages. */
 778     region.size = region_size - page_size;
 779     region.total_size -= page_size;
 780
 781     /*
 782      * The first region will be smaller than the others, via the prologue,
 783      * which has yet to be allocated.  For now, the first region begins at
 784      * the page boundary.
 785      */
 786     region.after_prologue = region.start_aligned;
 787
 788     /* init the region struct */
 789     qemu_mutex_init(&region.lock);
 790
 791     /*
 792      * Set guard pages in the rw buffer, as that's the one into which
 793      * buffer overruns could occur.  Do not set guard pages in the rx
 794      * buffer -- let that one use hugepages throughout.
 795      * Work with the page protections set up with the initial mapping.
 796      */
 797     need_prot = PAGE_READ | PAGE_WRITE;
 798 #ifndef CONFIG_TCG_INTERPRETER
 799     if (tcg_splitwx_diff == 0) {
 800         need_prot |= PAGE_EXEC;
 801     }
 802 #endif
 803     for (size_t i = 0, n = region.n; i < n; i++) {
 804         void *start, *end;
 805
 806         tcg_region_bounds(i, &start, &end);
 807         if (have_prot != need_prot) {
 808             int rc;
 809
 810             if (need_prot == (PAGE_READ | PAGE_WRITE | PAGE_EXEC)) {
 811                 rc = qemu_mprotect_rwx(start, end - start);
 812             } else if (need_prot == (PAGE_READ | PAGE_WRITE)) {
 813                 rc = qemu_mprotect_rw(start, end - start);
 814             } else {
 815                 g_assert_not_reached();
 816             }
 817             if (rc) {
 818                 error_setg_errno(&error_fatal, errno,
 819                                  "mprotect of jit buffer");
 820             }
 821         }
 822         if (have_prot != 0) {
 823             /* Guard pages are nice for bug detection but are not essential. */
 824             (void)qemu_mprotect_none(end, page_size);
 825         }
 826     }
 827
 828     tcg_region_trees_init();
 829
 830     /*
 831      * Leave the initial context initialized to the first region.
 832      * This will be the context into which we generate the prologue.
 833      * It is also the only context for CONFIG_USER_ONLY.
 834      */
 835     tcg_region_initial_alloc__locked(&tcg_init_ctx);
 836 }
 837
 838 void tcg_region_prologue_set(TCGContext *s)
 839 {
 840     /* Deduct the prologue from the first region.  */
 841     g_assert(region.start_aligned == s->code_gen_buffer);
 842     region.after_prologue = s->code_ptr;
 843
 844     /* Recompute boundaries of the first region. */
 845     tcg_region_assign(s, 0);
 846
 847     /* Register the balance of the buffer with gdb. */
 848     tcg_register_jit(tcg_splitwx_to_rx(region.after_prologue),
 849                      region.start_aligned + region.total_size -
 850                      region.after_prologue);
 851 }
 852
 853 /*
 854  * Returns the size (in bytes) of all translated code (i.e. from all regions)
 855  * currently in the cache.
 856  * See also: tcg_code_capacity()
 857  * Do not confuse with tcg_current_code_size(); that one applies to a single
 858  * TCG context.
 859  */
 860 size_t tcg_code_size(void)
 861 {
 862     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
 863     unsigned int i;
 864     size_t total;
 865
 866     qemu_mutex_lock(&region.lock);
 867     total = region.agg_size_full;
 868     for (i = 0; i < n_ctxs; i++) {
 869         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
 870         size_t size;
 871
 872         size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
 873         g_assert(size <= s->code_gen_buffer_size);
 874         total += size;
 875     }
 876     qemu_mutex_unlock(&region.lock);
 877     return total;
 878 }
 879
 880 /*
 881  * Returns the code capacity (in bytes) of the entire cache, i.e. including all
 882  * regions.
 883  * See also: tcg_code_size()
 884  */
 885 size_t tcg_code_capacity(void)
 886 {
 887     size_t guard_size, capacity;
 888
 889     /* no need for synchronization; these variables are set at init time */
 890     guard_size = region.stride - region.size;
 891     capacity = region.total_size;
 892     capacity -= (region.n - 1) * guard_size;
 893     capacity -= region.n * TCG_HIGHWATER;
 894
 895     return capacity;
 896 }