accel/tcg/cputlb.c

   1 /*
   2  *  Common CPU TLB handling
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include "qemu/main-loop.h"
  22 #include "cpu.h"
  23 #include "exec/exec-all.h"
  24 #include "exec/memory.h"
  25 #include "exec/address-spaces.h"
  26 #include "exec/cpu_ldst.h"
  27 #include "exec/cputlb.h"
  28 #include "exec/memory-internal.h"
  29 #include "exec/ram_addr.h"
  30 #include "tcg/tcg.h"
  31 #include "qemu/error-report.h"
  32 #include "exec/log.h"
  33 #include "exec/helper-proto.h"
  34 #include "qemu/atomic.h"
  35 #include "qemu/atomic128.h"
  36 #include "translate-all.h"
  37 #include "trace/trace-root.h"
  38 #include "trace/mem.h"
  39 #ifdef CONFIG_PLUGIN
  40 #include "qemu/plugin-memory.h"
  41 #endif
  42
  43 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
  44 /* #define DEBUG_TLB */
  45 /* #define DEBUG_TLB_LOG */
  46
  47 #ifdef DEBUG_TLB
  48 # define DEBUG_TLB_GATE 1
  49 # ifdef DEBUG_TLB_LOG
  50 #  define DEBUG_TLB_LOG_GATE 1
  51 # else
  52 #  define DEBUG_TLB_LOG_GATE 0
  53 # endif
  54 #else
  55 # define DEBUG_TLB_GATE 0
  56 # define DEBUG_TLB_LOG_GATE 0
  57 #endif
  58
  59 #define tlb_debug(fmt, ...) do { \
  60     if (DEBUG_TLB_LOG_GATE) { \
  61         qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \
  62                       ## __VA_ARGS__); \
  63     } else if (DEBUG_TLB_GATE) { \
  64         fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \
  65     } \
  66 } while (0)
  67
  68 #define assert_cpu_is_self(cpu) do {                              \
  69         if (DEBUG_TLB_GATE) {                                     \
  70             g_assert(!(cpu)->created || qemu_cpu_is_self(cpu));   \
  71         }                                                         \
  72     } while (0)
  73
  74 /* run_on_cpu_data.target_ptr should always be big enough for a
  75  * target_ulong even on 32 bit builds */
  76 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
  77
  78 /* We currently can't handle more than 16 bits in the MMUIDX bitmask.
  79  */
  80 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
  81 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
  82
  83 static inline size_t tlb_n_entries(CPUTLBDescFast *fast)
  84 {
  85     return (fast->mask >> CPU_TLB_ENTRY_BITS) + 1;
  86 }
  87
  88 static inline size_t sizeof_tlb(CPUTLBDescFast *fast)
  89 {
  90     return fast->mask + (1 << CPU_TLB_ENTRY_BITS);
  91 }
  92
  93 static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
  94                              size_t max_entries)
  95 {
  96     desc->window_begin_ns = ns;
  97     desc->window_max_entries = max_entries;
  98 }
  99
 100 /**
 101  * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
 102  * @desc: The CPUTLBDesc portion of the TLB
 103  * @fast: The CPUTLBDescFast portion of the same TLB
 104  *
 105  * Called with tlb_lock_held.
 106  *
 107  * We have two main constraints when resizing a TLB: (1) we only resize it
 108  * on a TLB flush (otherwise we'd have to take a perf hit by either rehashing
 109  * the array or unnecessarily flushing it), which means we do not control how
 110  * frequently the resizing can occur; (2) we don't have access to the guest's
 111  * future scheduling decisions, and therefore have to decide the magnitude of
 112  * the resize based on past observations.
 113  *
 114  * In general, a memory-hungry process can benefit greatly from an appropriately
 115  * sized TLB, since a guest TLB miss is very expensive. This doesn't mean that
 116  * we just have to make the TLB as large as possible; while an oversized TLB
 117  * results in minimal TLB miss rates, it also takes longer to be flushed
 118  * (flushes can be _very_ frequent), and the reduced locality can also hurt
 119  * performance.
 120  *
 121  * To achieve near-optimal performance for all kinds of workloads, we:
 122  *
 123  * 1. Aggressively increase the size of the TLB when the use rate of the
 124  * TLB being flushed is high, since it is likely that in the near future this
 125  * memory-hungry process will execute again, and its memory hungriness will
 126  * probably be similar.
 127  *
 128  * 2. Slowly reduce the size of the TLB as the use rate declines over a
 129  * reasonably large time window. The rationale is that if in such a time window
 130  * we have not observed a high TLB use rate, it is likely that we won't observe
 131  * it in the near future. In that case, once a time window expires we downsize
 132  * the TLB to match the maximum use rate observed in the window.
 133  *
 134  * 3. Try to keep the maximum use rate in a time window in the 30-70% range,
 135  * since in that range performance is likely near-optimal. Recall that the TLB
 136  * is direct mapped, so we want the use rate to be low (or at least not too
 137  * high), since otherwise we are likely to have a significant amount of
 138  * conflict misses.
 139  */
 140 static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
 141                                   int64_t now)
 142 {
 143     size_t old_size = tlb_n_entries(fast);
 144     size_t rate;
 145     size_t new_size = old_size;
 146     int64_t window_len_ms = 100;
 147     int64_t window_len_ns = window_len_ms * 1000 * 1000;
 148     bool window_expired = now > desc->window_begin_ns + window_len_ns;
 149
 150     if (desc->n_used_entries > desc->window_max_entries) {
 151         desc->window_max_entries = desc->n_used_entries;
 152     }
 153     rate = desc->window_max_entries * 100 / old_size;
 154
 155     if (rate > 70) {
 156         new_size = MIN(old_size << 1, 1 << CPU_TLB_DYN_MAX_BITS);
 157     } else if (rate < 30 && window_expired) {
 158         size_t ceil = pow2ceil(desc->window_max_entries);
 159         size_t expected_rate = desc->window_max_entries * 100 / ceil;
 160
 161         /*
 162          * Avoid undersizing when the max number of entries seen is just below
 163          * a pow2. For instance, if max_entries == 1025, the expected use rate
 164          * would be 1025/2048==50%. However, if max_entries == 1023, we'd get
 165          * 1023/1024==99.9% use rate, so we'd likely end up doubling the size
 166          * later. Thus, make sure that the expected use rate remains below 70%.
 167          * (and since we double the size, that means the lowest rate we'd
 168          * expect to get is 35%, which is still in the 30-70% range where
 169          * we consider that the size is appropriate.)
 170          */
 171         if (expected_rate > 70) {
 172             ceil *= 2;
 173         }
 174         new_size = MAX(ceil, 1 << CPU_TLB_DYN_MIN_BITS);
 175     }
 176
 177     if (new_size == old_size) {
 178         if (window_expired) {
 179             tlb_window_reset(desc, now, desc->n_used_entries);
 180         }
 181         return;
 182     }
 183
 184     g_free(fast->table);
 185     g_free(desc->iotlb);
 186
 187     tlb_window_reset(desc, now, 0);
 188     /* desc->n_used_entries is cleared by the caller */
 189     fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
 190     fast->table = g_try_new(CPUTLBEntry, new_size);
 191     desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
 192
 193     /*
 194      * If the allocations fail, try smaller sizes. We just freed some
 195      * memory, so going back to half of new_size has a good chance of working.
 196      * Increased memory pressure elsewhere in the system might cause the
 197      * allocations to fail though, so we progressively reduce the allocation
 198      * size, aborting if we cannot even allocate the smallest TLB we support.
 199      */
 200     while (fast->table == NULL || desc->iotlb == NULL) {
 201         if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) {
 202             error_report("%s: %s", __func__, strerror(errno));
 203             abort();
 204         }
 205         new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS);
 206         fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
 207
 208         g_free(fast->table);
 209         g_free(desc->iotlb);
 210         fast->table = g_try_new(CPUTLBEntry, new_size);
 211         desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
 212     }
 213 }
 214
 215 static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
 216 {
 217     desc->n_used_entries = 0;
 218     desc->large_page_addr = -1;
 219     desc->large_page_mask = -1;
 220     desc->vindex = 0;
 221     memset(fast->table, -1, sizeof_tlb(fast));
 222     memset(desc->vtable, -1, sizeof(desc->vtable));
 223 }
 224
 225 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx,
 226                                         int64_t now)
 227 {
 228     CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
 229     CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
 230
 231     tlb_mmu_resize_locked(desc, fast, now);
 232     tlb_mmu_flush_locked(desc, fast);
 233 }
 234
 235 static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now)
 236 {
 237     size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
 238
 239     tlb_window_reset(desc, now, 0);
 240     desc->n_used_entries = 0;
 241     fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
 242     fast->table = g_new(CPUTLBEntry, n_entries);
 243     desc->iotlb = g_new(CPUIOTLBEntry, n_entries);
 244     tlb_mmu_flush_locked(desc, fast);
 245 }
 246
 247 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
 248 {
 249     env_tlb(env)->d[mmu_idx].n_used_entries++;
 250 }
 251
 252 static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx)
 253 {
 254     env_tlb(env)->d[mmu_idx].n_used_entries--;
 255 }
 256
 257 void tlb_init(CPUState *cpu)
 258 {
 259     CPUArchState *env = cpu->env_ptr;
 260     int64_t now = get_clock_realtime();
 261     int i;
 262
 263     qemu_spin_init(&env_tlb(env)->c.lock);
 264
 265     /* All tlbs are initialized flushed. */
 266     env_tlb(env)->c.dirty = 0;
 267
 268     for (i = 0; i < NB_MMU_MODES; i++) {
 269         tlb_mmu_init(&env_tlb(env)->d[i], &env_tlb(env)->f[i], now);
 270     }
 271 }
 272
 273 void tlb_destroy(CPUState *cpu)
 274 {
 275     CPUArchState *env = cpu->env_ptr;
 276     int i;
 277
 278     qemu_spin_destroy(&env_tlb(env)->c.lock);
 279     for (i = 0; i < NB_MMU_MODES; i++) {
 280         CPUTLBDesc *desc = &env_tlb(env)->d[i];
 281         CPUTLBDescFast *fast = &env_tlb(env)->f[i];
 282
 283         g_free(fast->table);
 284         g_free(desc->iotlb);
 285     }
 286 }
 287
 288 /* flush_all_helper: run fn across all cpus
 289  *
 290  * If the wait flag is set then the src cpu's helper will be queued as
 291  * "safe" work and the loop exited creating a synchronisation point
 292  * where all queued work will be finished before execution starts
 293  * again.
 294  */
 295 static void flush_all_helper(CPUState *src, run_on_cpu_func fn,
 296                              run_on_cpu_data d)
 297 {
 298     CPUState *cpu;
 299
 300     CPU_FOREACH(cpu) {
 301         if (cpu != src) {
 302             async_run_on_cpu(cpu, fn, d);
 303         }
 304     }
 305 }
 306
 307 void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide)
 308 {
 309     CPUState *cpu;
 310     size_t full = 0, part = 0, elide = 0;
 311
 312     CPU_FOREACH(cpu) {
 313         CPUArchState *env = cpu->env_ptr;
 314
 315         full += qatomic_read(&env_tlb(env)->c.full_flush_count);
 316         part += qatomic_read(&env_tlb(env)->c.part_flush_count);
 317         elide += qatomic_read(&env_tlb(env)->c.elide_flush_count);
 318     }
 319     *pfull = full;
 320     *ppart = part;
 321     *pelide = elide;
 322 }
 323
 324 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
 325 {
 326     CPUArchState *env = cpu->env_ptr;
 327     uint16_t asked = data.host_int;
 328     uint16_t all_dirty, work, to_clean;
 329     int64_t now = get_clock_realtime();
 330
 331     assert_cpu_is_self(cpu);
 332
 333     tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked);
 334
 335     qemu_spin_lock(&env_tlb(env)->c.lock);
 336
 337     all_dirty = env_tlb(env)->c.dirty;
 338     to_clean = asked & all_dirty;
 339     all_dirty &= ~to_clean;
 340     env_tlb(env)->c.dirty = all_dirty;
 341
 342     for (work = to_clean; work != 0; work &= work - 1) {
 343         int mmu_idx = ctz32(work);
 344         tlb_flush_one_mmuidx_locked(env, mmu_idx, now);
 345     }
 346
 347     qemu_spin_unlock(&env_tlb(env)->c.lock);
 348
 349     cpu_tb_jmp_cache_clear(cpu);
 350
 351     if (to_clean == ALL_MMUIDX_BITS) {
 352         qatomic_set(&env_tlb(env)->c.full_flush_count,
 353                    env_tlb(env)->c.full_flush_count + 1);
 354     } else {
 355         qatomic_set(&env_tlb(env)->c.part_flush_count,
 356                    env_tlb(env)->c.part_flush_count + ctpop16(to_clean));
 357         if (to_clean != asked) {
 358             qatomic_set(&env_tlb(env)->c.elide_flush_count,
 359                        env_tlb(env)->c.elide_flush_count +
 360                        ctpop16(asked & ~to_clean));
 361         }
 362     }
 363 }
 364
 365 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
 366 {
 367     tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap);
 368
 369     if (cpu->created && !qemu_cpu_is_self(cpu)) {
 370         async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work,
 371                          RUN_ON_CPU_HOST_INT(idxmap));
 372     } else {
 373         tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap));
 374     }
 375 }
 376
 377 void tlb_flush(CPUState *cpu)
 378 {
 379     tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS);
 380 }
 381
 382 void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap)
 383 {
 384     const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
 385
 386     tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
 387
 388     flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
 389     fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap));
 390 }
 391
 392 void tlb_flush_all_cpus(CPUState *src_cpu)
 393 {
 394     tlb_flush_by_mmuidx_all_cpus(src_cpu, ALL_MMUIDX_BITS);
 395 }
 396
 397 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap)
 398 {
 399     const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
 400
 401     tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
 402
 403     flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
 404     async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
 405 }
 406
 407 void tlb_flush_all_cpus_synced(CPUState *src_cpu)
 408 {
 409     tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS);
 410 }
 411
 412 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry,
 413                                         target_ulong page)
 414 {
 415     return tlb_hit_page(tlb_entry->addr_read, page) ||
 416            tlb_hit_page(tlb_addr_write(tlb_entry), page) ||
 417            tlb_hit_page(tlb_entry->addr_code, page);
 418 }
 419
 420 /**
 421  * tlb_entry_is_empty - return true if the entry is not in use
 422  * @te: pointer to CPUTLBEntry
 423  */
 424 static inline bool tlb_entry_is_empty(const CPUTLBEntry *te)
 425 {
 426     return te->addr_read == -1 && te->addr_write == -1 && te->addr_code == -1;
 427 }
 428
 429 /* Called with tlb_c.lock held */
 430 static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry,
 431                                           target_ulong page)
 432 {
 433     if (tlb_hit_page_anyprot(tlb_entry, page)) {
 434         memset(tlb_entry, -1, sizeof(*tlb_entry));
 435         return true;
 436     }
 437     return false;
 438 }
 439
 440 /* Called with tlb_c.lock held */
 441 static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx,
 442                                               target_ulong page)
 443 {
 444     CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx];
 445     int k;
 446
 447     assert_cpu_is_self(env_cpu(env));
 448     for (k = 0; k < CPU_VTLB_SIZE; k++) {
 449         if (tlb_flush_entry_locked(&d->vtable[k], page)) {
 450             tlb_n_used_entries_dec(env, mmu_idx);
 451         }
 452     }
 453 }
 454
 455 static void tlb_flush_page_locked(CPUArchState *env, int midx,
 456                                   target_ulong page)
 457 {
 458     target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr;
 459     target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask;
 460
 461     /* Check if we need to flush due to large pages.  */
 462     if ((page & lp_mask) == lp_addr) {
 463         tlb_debug("forcing full flush midx %d ("
 464                   TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
 465                   midx, lp_addr, lp_mask);
 466         tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
 467     } else {
 468         if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) {
 469             tlb_n_used_entries_dec(env, midx);
 470         }
 471         tlb_flush_vtlb_page_locked(env, midx, page);
 472     }
 473 }
 474
 475 /**
 476  * tlb_flush_page_by_mmuidx_async_0:
 477  * @cpu: cpu on which to flush
 478  * @addr: page of virtual address to flush
 479  * @idxmap: set of mmu_idx to flush
 480  *
 481  * Helper for tlb_flush_page_by_mmuidx and friends, flush one page
 482  * at @addr from the tlbs indicated by @idxmap from @cpu.
 483  */
 484 static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu,
 485                                              target_ulong addr,
 486                                              uint16_t idxmap)
 487 {
 488     CPUArchState *env = cpu->env_ptr;
 489     int mmu_idx;
 490
 491     assert_cpu_is_self(cpu);
 492
 493     tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%x\n", addr, idxmap);
 494
 495     qemu_spin_lock(&env_tlb(env)->c.lock);
 496     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
 497         if ((idxmap >> mmu_idx) & 1) {
 498             tlb_flush_page_locked(env, mmu_idx, addr);
 499         }
 500     }
 501     qemu_spin_unlock(&env_tlb(env)->c.lock);
 502
 503     tb_flush_jmp_cache(cpu, addr);
 504 }
 505
 506 /**
 507  * tlb_flush_page_by_mmuidx_async_1:
 508  * @cpu: cpu on which to flush
 509  * @data: encoded addr + idxmap
 510  *
 511  * Helper for tlb_flush_page_by_mmuidx and friends, called through
 512  * async_run_on_cpu.  The idxmap parameter is encoded in the page
 513  * offset of the target_ptr field.  This limits the set of mmu_idx
 514  * that can be passed via this method.
 515  */
 516 static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu,
 517                                              run_on_cpu_data data)
 518 {
 519     target_ulong addr_and_idxmap = (target_ulong) data.target_ptr;
 520     target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK;
 521     uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK;
 522
 523     tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
 524 }
 525
 526 typedef struct {
 527     target_ulong addr;
 528     uint16_t idxmap;
 529 } TLBFlushPageByMMUIdxData;
 530
 531 /**
 532  * tlb_flush_page_by_mmuidx_async_2:
 533  * @cpu: cpu on which to flush
 534  * @data: allocated addr + idxmap
 535  *
 536  * Helper for tlb_flush_page_by_mmuidx and friends, called through
 537  * async_run_on_cpu.  The addr+idxmap parameters are stored in a
 538  * TLBFlushPageByMMUIdxData structure that has been allocated
 539  * specifically for this helper.  Free the structure when done.
 540  */
 541 static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu,
 542                                              run_on_cpu_data data)
 543 {
 544     TLBFlushPageByMMUIdxData *d = data.host_ptr;
 545
 546     tlb_flush_page_by_mmuidx_async_0(cpu, d->addr, d->idxmap);
 547     g_free(d);
 548 }
 549
 550 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
 551 {
 552     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
 553
 554     /* This should already be page aligned */
 555     addr &= TARGET_PAGE_MASK;
 556
 557     if (qemu_cpu_is_self(cpu)) {
 558         tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
 559     } else if (idxmap < TARGET_PAGE_SIZE) {
 560         /*
 561          * Most targets have only a few mmu_idx.  In the case where
 562          * we can stuff idxmap into the low TARGET_PAGE_BITS, avoid
 563          * allocating memory for this operation.
 564          */
 565         async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_1,
 566                          RUN_ON_CPU_TARGET_PTR(addr | idxmap));
 567     } else {
 568         TLBFlushPageByMMUIdxData *d = g_new(TLBFlushPageByMMUIdxData, 1);
 569
 570         /* Otherwise allocate a structure, freed by the worker.  */
 571         d->addr = addr;
 572         d->idxmap = idxmap;
 573         async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_2,
 574                          RUN_ON_CPU_HOST_PTR(d));
 575     }
 576 }
 577
 578 void tlb_flush_page(CPUState *cpu, target_ulong addr)
 579 {
 580     tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS);
 581 }
 582
 583 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
 584                                        uint16_t idxmap)
 585 {
 586     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
 587
 588     /* This should already be page aligned */
 589     addr &= TARGET_PAGE_MASK;
 590
 591     /*
 592      * Allocate memory to hold addr+idxmap only when needed.
 593      * See tlb_flush_page_by_mmuidx for details.
 594      */
 595     if (idxmap < TARGET_PAGE_SIZE) {
 596         flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
 597                          RUN_ON_CPU_TARGET_PTR(addr | idxmap));
 598     } else {
 599         CPUState *dst_cpu;
 600
 601         /* Allocate a separate data block for each destination cpu.  */
 602         CPU_FOREACH(dst_cpu) {
 603             if (dst_cpu != src_cpu) {
 604                 TLBFlushPageByMMUIdxData *d
 605                     = g_new(TLBFlushPageByMMUIdxData, 1);
 606
 607                 d->addr = addr;
 608                 d->idxmap = idxmap;
 609                 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
 610                                  RUN_ON_CPU_HOST_PTR(d));
 611             }
 612         }
 613     }
 614
 615     tlb_flush_page_by_mmuidx_async_0(src_cpu, addr, idxmap);
 616 }
 617
 618 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
 619 {
 620     tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS);
 621 }
 622
 623 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
 624                                               target_ulong addr,
 625                                               uint16_t idxmap)
 626 {
 627     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
 628
 629     /* This should already be page aligned */
 630     addr &= TARGET_PAGE_MASK;
 631
 632     /*
 633      * Allocate memory to hold addr+idxmap only when needed.
 634      * See tlb_flush_page_by_mmuidx for details.
 635      */
 636     if (idxmap < TARGET_PAGE_SIZE) {
 637         flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
 638                          RUN_ON_CPU_TARGET_PTR(addr | idxmap));
 639         async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_1,
 640                               RUN_ON_CPU_TARGET_PTR(addr | idxmap));
 641     } else {
 642         CPUState *dst_cpu;
 643         TLBFlushPageByMMUIdxData *d;
 644
 645         /* Allocate a separate data block for each destination cpu.  */
 646         CPU_FOREACH(dst_cpu) {
 647             if (dst_cpu != src_cpu) {
 648                 d = g_new(TLBFlushPageByMMUIdxData, 1);
 649                 d->addr = addr;
 650                 d->idxmap = idxmap;
 651                 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
 652                                  RUN_ON_CPU_HOST_PTR(d));
 653             }
 654         }
 655
 656         d = g_new(TLBFlushPageByMMUIdxData, 1);
 657         d->addr = addr;
 658         d->idxmap = idxmap;
 659         async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_2,
 660                               RUN_ON_CPU_HOST_PTR(d));
 661     }
 662 }
 663
 664 void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr)
 665 {
 666     tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS);
 667 }
 668
 669 /* update the TLBs so that writes to code in the virtual page 'addr'
 670    can be detected */
 671 void tlb_protect_code(ram_addr_t ram_addr)
 672 {
 673     cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE,
 674                                              DIRTY_MEMORY_CODE);
 675 }
 676
 677 /* update the TLB so that writes in physical page 'phys_addr' are no longer
 678    tested for self modifying code */
 679 void tlb_unprotect_code(ram_addr_t ram_addr)
 680 {
 681     cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
 682 }
 683
 684
 685 /*
 686  * Dirty write flag handling
 687  *
 688  * When the TCG code writes to a location it looks up the address in
 689  * the TLB and uses that data to compute the final address. If any of
 690  * the lower bits of the address are set then the slow path is forced.
 691  * There are a number of reasons to do this but for normal RAM the
 692  * most usual is detecting writes to code regions which may invalidate
 693  * generated code.
 694  *
 695  * Other vCPUs might be reading their TLBs during guest execution, so we update
 696  * te->addr_write with qatomic_set. We don't need to worry about this for
 697  * oversized guests as MTTCG is disabled for them.
 698  *
 699  * Called with tlb_c.lock held.
 700  */
 701 static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry,
 702                                          uintptr_t start, uintptr_t length)
 703 {
 704     uintptr_t addr = tlb_entry->addr_write;
 705
 706     if ((addr & (TLB_INVALID_MASK | TLB_MMIO |
 707                  TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) {
 708         addr &= TARGET_PAGE_MASK;
 709         addr += tlb_entry->addend;
 710         if ((addr - start) < length) {
 711 #if TCG_OVERSIZED_GUEST
 712             tlb_entry->addr_write |= TLB_NOTDIRTY;
 713 #else
 714             qatomic_set(&tlb_entry->addr_write,
 715                        tlb_entry->addr_write | TLB_NOTDIRTY);
 716 #endif
 717         }
 718     }
 719 }
 720
 721 /*
 722  * Called with tlb_c.lock held.
 723  * Called only from the vCPU context, i.e. the TLB's owner thread.
 724  */
 725 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s)
 726 {
 727     *d = *s;
 728 }
 729
 730 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of
 731  * the target vCPU).
 732  * We must take tlb_c.lock to avoid racing with another vCPU update. The only
 733  * thing actually updated is the target TLB entry ->addr_write flags.
 734  */
 735 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
 736 {
 737     CPUArchState *env;
 738
 739     int mmu_idx;
 740
 741     env = cpu->env_ptr;
 742     qemu_spin_lock(&env_tlb(env)->c.lock);
 743     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
 744         unsigned int i;
 745         unsigned int n = tlb_n_entries(&env_tlb(env)->f[mmu_idx]);
 746
 747         for (i = 0; i < n; i++) {
 748             tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i],
 749                                          start1, length);
 750         }
 751
 752         for (i = 0; i < CPU_VTLB_SIZE; i++) {
 753             tlb_reset_dirty_range_locked(&env_tlb(env)->d[mmu_idx].vtable[i],
 754                                          start1, length);
 755         }
 756     }
 757     qemu_spin_unlock(&env_tlb(env)->c.lock);
 758 }
 759
 760 /* Called with tlb_c.lock held */
 761 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry,
 762                                          target_ulong vaddr)
 763 {
 764     if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) {
 765         tlb_entry->addr_write = vaddr;
 766     }
 767 }
 768
 769 /* update the TLB corresponding to virtual page vaddr
 770    so that it is no longer dirty */
 771 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
 772 {
 773     CPUArchState *env = cpu->env_ptr;
 774     int mmu_idx;
 775
 776     assert_cpu_is_self(cpu);
 777
 778     vaddr &= TARGET_PAGE_MASK;
 779     qemu_spin_lock(&env_tlb(env)->c.lock);
 780     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
 781         tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr);
 782     }
 783
 784     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
 785         int k;
 786         for (k = 0; k < CPU_VTLB_SIZE; k++) {
 787             tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], vaddr);
 788         }
 789     }
 790     qemu_spin_unlock(&env_tlb(env)->c.lock);
 791 }
 792
 793 /* Our TLB does not support large pages, so remember the area covered by
 794    large pages and trigger a full TLB flush if these are invalidated.  */
 795 static void tlb_add_large_page(CPUArchState *env, int mmu_idx,
 796                                target_ulong vaddr, target_ulong size)
 797 {
 798     target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr;
 799     target_ulong lp_mask = ~(size - 1);
 800
 801     if (lp_addr == (target_ulong)-1) {
 802         /* No previous large page.  */
 803         lp_addr = vaddr;
 804     } else {
 805         /* Extend the existing region to include the new page.
 806            This is a compromise between unnecessary flushes and
 807            the cost of maintaining a full variable size TLB.  */
 808         lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask;
 809         while (((lp_addr ^ vaddr) & lp_mask) != 0) {
 810             lp_mask <<= 1;
 811         }
 812     }
 813     env_tlb(env)->d[mmu_idx].large_page_addr = lp_addr & lp_mask;
 814     env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask;
 815 }
 816
 817 /* Add a new TLB entry. At most one entry for a given virtual address
 818  * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
 819  * supplied size is only used by tlb_flush_page.
 820  *
 821  * Called from TCG-generated code, which is under an RCU read-side
 822  * critical section.
 823  */
 824 void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
 825                              hwaddr paddr, MemTxAttrs attrs, int prot,
 826                              int mmu_idx, target_ulong size)
 827 {
 828     CPUArchState *env = cpu->env_ptr;
 829     CPUTLB *tlb = env_tlb(env);
 830     CPUTLBDesc *desc = &tlb->d[mmu_idx];
 831     MemoryRegionSection *section;
 832     unsigned int index;
 833     target_ulong address;
 834     target_ulong write_address;
 835     uintptr_t addend;
 836     CPUTLBEntry *te, tn;
 837     hwaddr iotlb, xlat, sz, paddr_page;
 838     target_ulong vaddr_page;
 839     int asidx = cpu_asidx_from_attrs(cpu, attrs);
 840     int wp_flags;
 841     bool is_ram, is_romd;
 842
 843     assert_cpu_is_self(cpu);
 844
 845     if (size <= TARGET_PAGE_SIZE) {
 846         sz = TARGET_PAGE_SIZE;
 847     } else {
 848         tlb_add_large_page(env, mmu_idx, vaddr, size);
 849         sz = size;
 850     }
 851     vaddr_page = vaddr & TARGET_PAGE_MASK;
 852     paddr_page = paddr & TARGET_PAGE_MASK;
 853
 854     section = address_space_translate_for_iotlb(cpu, asidx, paddr_page,
 855                                                 &xlat, &sz, attrs, &prot);
 856     assert(sz >= TARGET_PAGE_SIZE);
 857
 858     tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
 859               " prot=%x idx=%d\n",
 860               vaddr, paddr, prot, mmu_idx);
 861
 862     address = vaddr_page;
 863     if (size < TARGET_PAGE_SIZE) {
 864         /* Repeat the MMU check and TLB fill on every access.  */
 865         address |= TLB_INVALID_MASK;
 866     }
 867     if (attrs.byte_swap) {
 868         address |= TLB_BSWAP;
 869     }
 870
 871     is_ram = memory_region_is_ram(section->mr);
 872     is_romd = memory_region_is_romd(section->mr);
 873
 874     if (is_ram || is_romd) {
 875         /* RAM and ROMD both have associated host memory. */
 876         addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
 877     } else {
 878         /* I/O does not; force the host address to NULL. */
 879         addend = 0;
 880     }
 881
 882     write_address = address;
 883     if (is_ram) {
 884         iotlb = memory_region_get_ram_addr(section->mr) + xlat;
 885         /*
 886          * Computing is_clean is expensive; avoid all that unless
 887          * the page is actually writable.
 888          */
 889         if (prot & PAGE_WRITE) {
 890             if (section->readonly) {
 891                 write_address |= TLB_DISCARD_WRITE;
 892             } else if (cpu_physical_memory_is_clean(iotlb)) {
 893                 write_address |= TLB_NOTDIRTY;
 894             }
 895         }
 896     } else {
 897         /* I/O or ROMD */
 898         iotlb = memory_region_section_get_iotlb(cpu, section) + xlat;
 899         /*
 900          * Writes to romd devices must go through MMIO to enable write.
 901          * Reads to romd devices go through the ram_ptr found above,
 902          * but of course reads to I/O must go through MMIO.
 903          */
 904         write_address |= TLB_MMIO;
 905         if (!is_romd) {
 906             address = write_address;
 907         }
 908     }
 909
 910     wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page,
 911                                               TARGET_PAGE_SIZE);
 912
 913     index = tlb_index(env, mmu_idx, vaddr_page);
 914     te = tlb_entry(env, mmu_idx, vaddr_page);
 915
 916     /*
 917      * Hold the TLB lock for the rest of the function. We could acquire/release
 918      * the lock several times in the function, but it is faster to amortize the
 919      * acquisition cost by acquiring it just once. Note that this leads to
 920      * a longer critical section, but this is not a concern since the TLB lock
 921      * is unlikely to be contended.
 922      */
 923     qemu_spin_lock(&tlb->c.lock);
 924
 925     /* Note that the tlb is no longer clean.  */
 926     tlb->c.dirty |= 1 << mmu_idx;
 927
 928     /* Make sure there's no cached translation for the new page.  */
 929     tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page);
 930
 931     /*
 932      * Only evict the old entry to the victim tlb if it's for a
 933      * different page; otherwise just overwrite the stale data.
 934      */
 935     if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) {
 936         unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE;
 937         CPUTLBEntry *tv = &desc->vtable[vidx];
 938
 939         /* Evict the old entry into the victim tlb.  */
 940         copy_tlb_helper_locked(tv, te);
 941         desc->viotlb[vidx] = desc->iotlb[index];
 942         tlb_n_used_entries_dec(env, mmu_idx);
 943     }
 944
 945     /* refill the tlb */
 946     /*
 947      * At this point iotlb contains a physical section number in the lower
 948      * TARGET_PAGE_BITS, and either
 949      *  + the ram_addr_t of the page base of the target RAM (RAM)
 950      *  + the offset within section->mr of the page base (I/O, ROMD)
 951      * We subtract the vaddr_page (which is page aligned and thus won't
 952      * disturb the low bits) to give an offset which can be added to the
 953      * (non-page-aligned) vaddr of the eventual memory access to get
 954      * the MemoryRegion offset for the access. Note that the vaddr we
 955      * subtract here is that of the page base, and not the same as the
 956      * vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
 957      */
 958     desc->iotlb[index].addr = iotlb - vaddr_page;
 959     desc->iotlb[index].attrs = attrs;
 960
 961     /* Now calculate the new entry */
 962     tn.addend = addend - vaddr_page;
 963     if (prot & PAGE_READ) {
 964         tn.addr_read = address;
 965         if (wp_flags & BP_MEM_READ) {
 966             tn.addr_read |= TLB_WATCHPOINT;
 967         }
 968     } else {
 969         tn.addr_read = -1;
 970     }
 971
 972     if (prot & PAGE_EXEC) {
 973         tn.addr_code = address;
 974     } else {
 975         tn.addr_code = -1;
 976     }
 977
 978     tn.addr_write = -1;
 979     if (prot & PAGE_WRITE) {
 980         tn.addr_write = write_address;
 981         if (prot & PAGE_WRITE_INV) {
 982             tn.addr_write |= TLB_INVALID_MASK;
 983         }
 984         if (wp_flags & BP_MEM_WRITE) {
 985             tn.addr_write |= TLB_WATCHPOINT;
 986         }
 987     }
 988
 989     copy_tlb_helper_locked(te, &tn);
 990     tlb_n_used_entries_inc(env, mmu_idx);
 991     qemu_spin_unlock(&tlb->c.lock);
 992 }
 993
 994 /* Add a new TLB entry, but without specifying the memory
 995  * transaction attributes to be used.
 996  */
 997 void tlb_set_page(CPUState *cpu, target_ulong vaddr,
 998                   hwaddr paddr, int prot,
 999                   int mmu_idx, target_ulong size)
1000 {
1001     tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED,
1002                             prot, mmu_idx, size);
1003 }
1004
1005 static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
1006 {
1007     ram_addr_t ram_addr;
1008
1009     ram_addr = qemu_ram_addr_from_host(ptr);
1010     if (ram_addr == RAM_ADDR_INVALID) {
1011         error_report("Bad ram pointer %p", ptr);
1012         abort();
1013     }
1014     return ram_addr;
1015 }
1016
1017 /*
1018  * Note: tlb_fill() can trigger a resize of the TLB. This means that all of the
1019  * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must
1020  * be discarded and looked up again (e.g. via tlb_entry()).
1021  */
1022 static void tlb_fill(CPUState *cpu, target_ulong addr, int size,
1023                      MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
1024 {
1025     CPUClass *cc = CPU_GET_CLASS(cpu);
1026     bool ok;
1027
1028     /*
1029      * This is not a probe, so only valid return is success; failure
1030      * should result in exception + longjmp to the cpu loop.
1031      */
1032     ok = cc->tlb_fill(cpu, addr, size, access_type, mmu_idx, false, retaddr);
1033     assert(ok);
1034 }
1035
1036 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
1037                          int mmu_idx, target_ulong addr, uintptr_t retaddr,
1038                          MMUAccessType access_type, MemOp op)
1039 {
1040     CPUState *cpu = env_cpu(env);
1041     hwaddr mr_offset;
1042     MemoryRegionSection *section;
1043     MemoryRegion *mr;
1044     uint64_t val;
1045     bool locked = false;
1046     MemTxResult r;
1047
1048     section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
1049     mr = section->mr;
1050     mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
1051     cpu->mem_io_pc = retaddr;
1052     if (!cpu->can_do_io) {
1053         cpu_io_recompile(cpu, retaddr);
1054     }
1055
1056     if (!qemu_mutex_iothread_locked()) {
1057         qemu_mutex_lock_iothread();
1058         locked = true;
1059     }
1060     r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs);
1061     if (r != MEMTX_OK) {
1062         hwaddr physaddr = mr_offset +
1063             section->offset_within_address_space -
1064             section->offset_within_region;
1065
1066         cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type,
1067                                mmu_idx, iotlbentry->attrs, r, retaddr);
1068     }
1069     if (locked) {
1070         qemu_mutex_unlock_iothread();
1071     }
1072
1073     return val;
1074 }
1075
1076 /*
1077  * Save a potentially trashed IOTLB entry for later lookup by plugin.
1078  * This is read by tlb_plugin_lookup if the iotlb entry doesn't match
1079  * because of the side effect of io_writex changing memory layout.
1080  */
1081 static void save_iotlb_data(CPUState *cs, hwaddr addr,
1082                             MemoryRegionSection *section, hwaddr mr_offset)
1083 {
1084 #ifdef CONFIG_PLUGIN
1085     SavedIOTLB *saved = &cs->saved_iotlb;
1086     saved->addr = addr;
1087     saved->section = section;
1088     saved->mr_offset = mr_offset;
1089 #endif
1090 }
1091
1092 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
1093                       int mmu_idx, uint64_t val, target_ulong addr,
1094                       uintptr_t retaddr, MemOp op)
1095 {
1096     CPUState *cpu = env_cpu(env);
1097     hwaddr mr_offset;
1098     MemoryRegionSection *section;
1099     MemoryRegion *mr;
1100     bool locked = false;
1101     MemTxResult r;
1102
1103     section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
1104     mr = section->mr;
1105     mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
1106     if (!cpu->can_do_io) {
1107         cpu_io_recompile(cpu, retaddr);
1108     }
1109     cpu->mem_io_pc = retaddr;
1110
1111     /*
1112      * The memory_region_dispatch may trigger a flush/resize
1113      * so for plugins we save the iotlb_data just in case.
1114      */
1115     save_iotlb_data(cpu, iotlbentry->addr, section, mr_offset);
1116
1117     if (!qemu_mutex_iothread_locked()) {
1118         qemu_mutex_lock_iothread();
1119         locked = true;
1120     }
1121     r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs);
1122     if (r != MEMTX_OK) {
1123         hwaddr physaddr = mr_offset +
1124             section->offset_within_address_space -
1125             section->offset_within_region;
1126
1127         cpu_transaction_failed(cpu, physaddr, addr, memop_size(op),
1128                                MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r,
1129                                retaddr);
1130     }
1131     if (locked) {
1132         qemu_mutex_unlock_iothread();
1133     }
1134 }
1135
1136 static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs)
1137 {
1138 #if TCG_OVERSIZED_GUEST
1139     return *(target_ulong *)((uintptr_t)entry + ofs);
1140 #else
1141     /* ofs might correspond to .addr_write, so use qatomic_read */
1142     return qatomic_read((target_ulong *)((uintptr_t)entry + ofs));
1143 #endif
1144 }
1145
1146 /* Return true if ADDR is present in the victim tlb, and has been copied
1147    back to the main tlb.  */
1148 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
1149                            size_t elt_ofs, target_ulong page)
1150 {
1151     size_t vidx;
1152
1153     assert_cpu_is_self(env_cpu(env));
1154     for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
1155         CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx];
1156         target_ulong cmp;
1157
1158         /* elt_ofs might correspond to .addr_write, so use qatomic_read */
1159 #if TCG_OVERSIZED_GUEST
1160         cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
1161 #else
1162         cmp = qatomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs));
1163 #endif
1164
1165         if (cmp == page) {
1166             /* Found entry in victim tlb, swap tlb and iotlb.  */
1167             CPUTLBEntry tmptlb, *tlb = &env_tlb(env)->f[mmu_idx].table[index];
1168
1169             qemu_spin_lock(&env_tlb(env)->c.lock);
1170             copy_tlb_helper_locked(&tmptlb, tlb);
1171             copy_tlb_helper_locked(tlb, vtlb);
1172             copy_tlb_helper_locked(vtlb, &tmptlb);
1173             qemu_spin_unlock(&env_tlb(env)->c.lock);
1174
1175             CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index];
1176             CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx];
1177             tmpio = *io; *io = *vio; *vio = tmpio;
1178             return true;
1179         }
1180     }
1181     return false;
1182 }
1183
1184 /* Macro to call the above, with local variables from the use context.  */
1185 #define VICTIM_TLB_HIT(TY, ADDR) \
1186   victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
1187                  (ADDR) & TARGET_PAGE_MASK)
1188
1189 /*
1190  * Return a ram_addr_t for the virtual address for execution.
1191  *
1192  * Return -1 if we can't translate and execute from an entire page
1193  * of RAM.  This will force us to execute by loading and translating
1194  * one insn at a time, without caching.
1195  *
1196  * NOTE: This function will trigger an exception if the page is
1197  * not executable.
1198  */
1199 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
1200                                         void **hostp)
1201 {
1202     uintptr_t mmu_idx = cpu_mmu_index(env, true);
1203     uintptr_t index = tlb_index(env, mmu_idx, addr);
1204     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1205     void *p;
1206
1207     if (unlikely(!tlb_hit(entry->addr_code, addr))) {
1208         if (!VICTIM_TLB_HIT(addr_code, addr)) {
1209             tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0);
1210             index = tlb_index(env, mmu_idx, addr);
1211             entry = tlb_entry(env, mmu_idx, addr);
1212
1213             if (unlikely(entry->addr_code & TLB_INVALID_MASK)) {
1214                 /*
1215                  * The MMU protection covers a smaller range than a target
1216                  * page, so we must redo the MMU check for every insn.
1217                  */
1218                 return -1;
1219             }
1220         }
1221         assert(tlb_hit(entry->addr_code, addr));
1222     }
1223
1224     if (unlikely(entry->addr_code & TLB_MMIO)) {
1225         /* The region is not backed by RAM.  */
1226         if (hostp) {
1227             *hostp = NULL;
1228         }
1229         return -1;
1230     }
1231
1232     p = (void *)((uintptr_t)addr + entry->addend);
1233     if (hostp) {
1234         *hostp = p;
1235     }
1236     return qemu_ram_addr_from_host_nofail(p);
1237 }
1238
1239 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
1240 {
1241     return get_page_addr_code_hostp(env, addr, NULL);
1242 }
1243
1244 static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
1245                            CPUIOTLBEntry *iotlbentry, uintptr_t retaddr)
1246 {
1247     ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr;
1248
1249     trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size);
1250
1251     if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1252         struct page_collection *pages
1253             = page_collection_lock(ram_addr, ram_addr + size);
1254         tb_invalidate_phys_page_fast(pages, ram_addr, size, retaddr);
1255         page_collection_unlock(pages);
1256     }
1257
1258     /*
1259      * Set both VGA and migration bits for simplicity and to remove
1260      * the notdirty callback faster.
1261      */
1262     cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE);
1263
1264     /* We remove the notdirty callback only if the code has been flushed. */
1265     if (!cpu_physical_memory_is_clean(ram_addr)) {
1266         trace_memory_notdirty_set_dirty(mem_vaddr);
1267         tlb_set_dirty(cpu, mem_vaddr);
1268     }
1269 }
1270
1271 static int probe_access_internal(CPUArchState *env, target_ulong addr,
1272                                  int fault_size, MMUAccessType access_type,
1273                                  int mmu_idx, bool nonfault,
1274                                  void **phost, uintptr_t retaddr)
1275 {
1276     uintptr_t index = tlb_index(env, mmu_idx, addr);
1277     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1278     target_ulong tlb_addr, page_addr;
1279     size_t elt_ofs;
1280     int flags;
1281
1282     switch (access_type) {
1283     case MMU_DATA_LOAD:
1284         elt_ofs = offsetof(CPUTLBEntry, addr_read);
1285         break;
1286     case MMU_DATA_STORE:
1287         elt_ofs = offsetof(CPUTLBEntry, addr_write);
1288         break;
1289     case MMU_INST_FETCH:
1290         elt_ofs = offsetof(CPUTLBEntry, addr_code);
1291         break;
1292     default:
1293         g_assert_not_reached();
1294     }
1295     tlb_addr = tlb_read_ofs(entry, elt_ofs);
1296
1297     page_addr = addr & TARGET_PAGE_MASK;
1298     if (!tlb_hit_page(tlb_addr, page_addr)) {
1299         if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) {
1300             CPUState *cs = env_cpu(env);
1301             CPUClass *cc = CPU_GET_CLASS(cs);
1302
1303             if (!cc->tlb_fill(cs, addr, fault_size, access_type,
1304                               mmu_idx, nonfault, retaddr)) {
1305                 /* Non-faulting page table read failed.  */
1306                 *phost = NULL;
1307                 return TLB_INVALID_MASK;
1308             }
1309
1310             /* TLB resize via tlb_fill may have moved the entry.  */
1311             entry = tlb_entry(env, mmu_idx, addr);
1312         }
1313         tlb_addr = tlb_read_ofs(entry, elt_ofs);
1314     }
1315     flags = tlb_addr & TLB_FLAGS_MASK;
1316
1317     /* Fold all "mmio-like" bits into TLB_MMIO.  This is not RAM.  */
1318     if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) {
1319         *phost = NULL;
1320         return TLB_MMIO;
1321     }
1322
1323     /* Everything else is RAM. */
1324     *phost = (void *)((uintptr_t)addr + entry->addend);
1325     return flags;
1326 }
1327
1328 int probe_access_flags(CPUArchState *env, target_ulong addr,
1329                        MMUAccessType access_type, int mmu_idx,
1330                        bool nonfault, void **phost, uintptr_t retaddr)
1331 {
1332     int flags;
1333
1334     flags = probe_access_internal(env, addr, 0, access_type, mmu_idx,
1335                                   nonfault, phost, retaddr);
1336
1337     /* Handle clean RAM pages.  */
1338     if (unlikely(flags & TLB_NOTDIRTY)) {
1339         uintptr_t index = tlb_index(env, mmu_idx, addr);
1340         CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1341
1342         notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);
1343         flags &= ~TLB_NOTDIRTY;
1344     }
1345
1346     return flags;
1347 }
1348
1349 void *probe_access(CPUArchState *env, target_ulong addr, int size,
1350                    MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
1351 {
1352     void *host;
1353     int flags;
1354
1355     g_assert(-(addr | TARGET_PAGE_MASK) >= size);
1356
1357     flags = probe_access_internal(env, addr, size, access_type, mmu_idx,
1358                                   false, &host, retaddr);
1359
1360     /* Per the interface, size == 0 merely faults the access. */
1361     if (size == 0) {
1362         return NULL;
1363     }
1364
1365     if (unlikely(flags & (TLB_NOTDIRTY | TLB_WATCHPOINT))) {
1366         uintptr_t index = tlb_index(env, mmu_idx, addr);
1367         CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1368
1369         /* Handle watchpoints.  */
1370         if (flags & TLB_WATCHPOINT) {
1371             int wp_access = (access_type == MMU_DATA_STORE
1372                              ? BP_MEM_WRITE : BP_MEM_READ);
1373             cpu_check_watchpoint(env_cpu(env), addr, size,
1374                                  iotlbentry->attrs, wp_access, retaddr);
1375         }
1376
1377         /* Handle clean RAM pages.  */
1378         if (flags & TLB_NOTDIRTY) {
1379             notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);
1380         }
1381     }
1382
1383     return host;
1384 }
1385
1386 void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
1387                         MMUAccessType access_type, int mmu_idx)
1388 {
1389     void *host;
1390     int flags;
1391
1392     flags = probe_access_internal(env, addr, 0, access_type,
1393                                   mmu_idx, true, &host, 0);
1394
1395     /* No combination of flags are expected by the caller. */
1396     return flags ? NULL : host;
1397 }
1398
1399 #ifdef CONFIG_PLUGIN
1400 /*
1401  * Perform a TLB lookup and populate the qemu_plugin_hwaddr structure.
1402  * This should be a hot path as we will have just looked this path up
1403  * in the softmmu lookup code (or helper). We don't handle re-fills or
1404  * checking the victim table. This is purely informational.
1405  *
1406  * This almost never fails as the memory access being instrumented
1407  * should have just filled the TLB. The one corner case is io_writex
1408  * which can cause TLB flushes and potential resizing of the TLBs
1409  * losing the information we need. In those cases we need to recover
1410  * data from a copy of the iotlbentry. As long as this always occurs
1411  * from the same thread (which a mem callback will be) this is safe.
1412  */
1413
1414 bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
1415                        bool is_store, struct qemu_plugin_hwaddr *data)
1416 {
1417     CPUArchState *env = cpu->env_ptr;
1418     CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
1419     uintptr_t index = tlb_index(env, mmu_idx, addr);
1420     target_ulong tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read;
1421
1422     if (likely(tlb_hit(tlb_addr, addr))) {
1423         /* We must have an iotlb entry for MMIO */
1424         if (tlb_addr & TLB_MMIO) {
1425             CPUIOTLBEntry *iotlbentry;
1426             iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1427             data->is_io = true;
1428             data->v.io.section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
1429             data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
1430         } else {
1431             data->is_io = false;
1432             data->v.ram.hostaddr = addr + tlbe->addend;
1433         }
1434         return true;
1435     } else {
1436         SavedIOTLB *saved = &cpu->saved_iotlb;
1437         data->is_io = true;
1438         data->v.io.section = saved->section;
1439         data->v.io.offset = saved->mr_offset;
1440         return true;
1441     }
1442 }
1443
1444 #endif
1445
1446 /* Probe for a read-modify-write atomic operation.  Do not allow unaligned
1447  * operations, or io operations to proceed.  Return the host address.  */
1448 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
1449                                TCGMemOpIdx oi, uintptr_t retaddr)
1450 {
1451     size_t mmu_idx = get_mmuidx(oi);
1452     uintptr_t index = tlb_index(env, mmu_idx, addr);
1453     CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
1454     target_ulong tlb_addr = tlb_addr_write(tlbe);
1455     MemOp mop = get_memop(oi);
1456     int a_bits = get_alignment_bits(mop);
1457     int s_bits = mop & MO_SIZE;
1458     void *hostaddr;
1459
1460     /* Adjust the given return address.  */
1461     retaddr -= GETPC_ADJ;
1462
1463     /* Enforce guest required alignment.  */
1464     if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) {
1465         /* ??? Maybe indicate atomic op to cpu_unaligned_access */
1466         cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
1467                              mmu_idx, retaddr);
1468     }
1469
1470     /* Enforce qemu required alignment.  */
1471     if (unlikely(addr & ((1 << s_bits) - 1))) {
1472         /* We get here if guest alignment was not requested,
1473            or was not enforced by cpu_unaligned_access above.
1474            We might widen the access and emulate, but for now
1475            mark an exception and exit the cpu loop.  */
1476         goto stop_the_world;
1477     }
1478
1479     /* Check TLB entry and enforce page permissions.  */
1480     if (!tlb_hit(tlb_addr, addr)) {
1481         if (!VICTIM_TLB_HIT(addr_write, addr)) {
1482             tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_STORE,
1483                      mmu_idx, retaddr);
1484             index = tlb_index(env, mmu_idx, addr);
1485             tlbe = tlb_entry(env, mmu_idx, addr);
1486         }
1487         tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
1488     }
1489
1490     /* Notice an IO access or a needs-MMU-lookup access */
1491     if (unlikely(tlb_addr & TLB_MMIO)) {
1492         /* There's really nothing that can be done to
1493            support this apart from stop-the-world.  */
1494         goto stop_the_world;
1495     }
1496
1497     /* Let the guest notice RMW on a write-only page.  */
1498     if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) {
1499         tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_LOAD,
1500                  mmu_idx, retaddr);
1501         /* Since we don't support reads and writes to different addresses,
1502            and we do have the proper page loaded for write, this shouldn't
1503            ever return.  But just in case, handle via stop-the-world.  */
1504         goto stop_the_world;
1505     }
1506
1507     hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
1508
1509     if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
1510         notdirty_write(env_cpu(env), addr, 1 << s_bits,
1511                        &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr);
1512     }
1513
1514     return hostaddr;
1515
1516  stop_the_world:
1517     cpu_loop_exit_atomic(env_cpu(env), retaddr);
1518 }
1519
1520 /*
1521  * Load Helpers
1522  *
1523  * We support two different access types. SOFTMMU_CODE_ACCESS is
1524  * specifically for reading instructions from system memory. It is
1525  * called by the translation loop and in some helpers where the code
1526  * is disassembled. It shouldn't be called directly by guest code.
1527  */
1528
1529 typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr,
1530                                 TCGMemOpIdx oi, uintptr_t retaddr);
1531
1532 static inline uint64_t QEMU_ALWAYS_INLINE
1533 load_memop(const void *haddr, MemOp op)
1534 {
1535     switch (op) {
1536     case MO_UB:
1537         return ldub_p(haddr);
1538     case MO_BEUW:
1539         return lduw_be_p(haddr);
1540     case MO_LEUW:
1541         return lduw_le_p(haddr);
1542     case MO_BEUL:
1543         return (uint32_t)ldl_be_p(haddr);
1544     case MO_LEUL:
1545         return (uint32_t)ldl_le_p(haddr);
1546     case MO_BEQ:
1547         return ldq_be_p(haddr);
1548     case MO_LEQ:
1549         return ldq_le_p(haddr);
1550     default:
1551         qemu_build_not_reached();
1552     }
1553 }
1554
1555 static inline uint64_t QEMU_ALWAYS_INLINE
1556 load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
1557             uintptr_t retaddr, MemOp op, bool code_read,
1558             FullLoadHelper *full_load)
1559 {
1560     uintptr_t mmu_idx = get_mmuidx(oi);
1561     uintptr_t index = tlb_index(env, mmu_idx, addr);
1562     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1563     target_ulong tlb_addr = code_read ? entry->addr_code : entry->addr_read;
1564     const size_t tlb_off = code_read ?
1565         offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read);
1566     const MMUAccessType access_type =
1567         code_read ? MMU_INST_FETCH : MMU_DATA_LOAD;
1568     unsigned a_bits = get_alignment_bits(get_memop(oi));
1569     void *haddr;
1570     uint64_t res;
1571     size_t size = memop_size(op);
1572
1573     /* Handle CPU specific unaligned behaviour */
1574     if (addr & ((1 << a_bits) - 1)) {
1575         cpu_unaligned_access(env_cpu(env), addr, access_type,
1576                              mmu_idx, retaddr);
1577     }
1578
1579     /* If the TLB entry is for a different page, reload and try again.  */
1580     if (!tlb_hit(tlb_addr, addr)) {
1581         if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
1582                             addr & TARGET_PAGE_MASK)) {
1583             tlb_fill(env_cpu(env), addr, size,
1584                      access_type, mmu_idx, retaddr);
1585             index = tlb_index(env, mmu_idx, addr);
1586             entry = tlb_entry(env, mmu_idx, addr);
1587         }
1588         tlb_addr = code_read ? entry->addr_code : entry->addr_read;
1589         tlb_addr &= ~TLB_INVALID_MASK;
1590     }
1591
1592     /* Handle anything that isn't just a straight memory access.  */
1593     if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
1594         CPUIOTLBEntry *iotlbentry;
1595         bool need_swap;
1596
1597         /* For anything that is unaligned, recurse through full_load.  */
1598         if ((addr & (size - 1)) != 0) {
1599             goto do_unaligned_access;
1600         }
1601
1602         iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1603
1604         /* Handle watchpoints.  */
1605         if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
1606             /* On watchpoint hit, this will longjmp out.  */
1607             cpu_check_watchpoint(env_cpu(env), addr, size,
1608                                  iotlbentry->attrs, BP_MEM_READ, retaddr);
1609         }
1610
1611         need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
1612
1613         /* Handle I/O access.  */
1614         if (likely(tlb_addr & TLB_MMIO)) {
1615             return io_readx(env, iotlbentry, mmu_idx, addr, retaddr,
1616                             access_type, op ^ (need_swap * MO_BSWAP));
1617         }
1618
1619         haddr = (void *)((uintptr_t)addr + entry->addend);
1620
1621         /*
1622          * Keep these two load_memop separate to ensure that the compiler
1623          * is able to fold the entire function to a single instruction.
1624          * There is a build-time assert inside to remind you of this.  ;-)
1625          */
1626         if (unlikely(need_swap)) {
1627             return load_memop(haddr, op ^ MO_BSWAP);
1628         }
1629         return load_memop(haddr, op);
1630     }
1631
1632     /* Handle slow unaligned access (it spans two pages or IO).  */
1633     if (size > 1
1634         && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
1635                     >= TARGET_PAGE_SIZE)) {
1636         target_ulong addr1, addr2;
1637         uint64_t r1, r2;
1638         unsigned shift;
1639     do_unaligned_access:
1640         addr1 = addr & ~((target_ulong)size - 1);
1641         addr2 = addr1 + size;
1642         r1 = full_load(env, addr1, oi, retaddr);
1643         r2 = full_load(env, addr2, oi, retaddr);
1644         shift = (addr & (size - 1)) * 8;
1645
1646         if (memop_big_endian(op)) {
1647             /* Big-endian combine.  */
1648             res = (r1 << shift) | (r2 >> ((size * 8) - shift));
1649         } else {
1650             /* Little-endian combine.  */
1651             res = (r1 >> shift) | (r2 << ((size * 8) - shift));
1652         }
1653         return res & MAKE_64BIT_MASK(0, size * 8);
1654     }
1655
1656     haddr = (void *)((uintptr_t)addr + entry->addend);
1657     return load_memop(haddr, op);
1658 }
1659
1660 /*
1661  * For the benefit of TCG generated code, we want to avoid the
1662  * complication of ABI-specific return type promotion and always
1663  * return a value extended to the register size of the host. This is
1664  * tcg_target_long, except in the case of a 32-bit host and 64-bit
1665  * data, and for that we always have uint64_t.
1666  *
1667  * We don't bother with this widened value for SOFTMMU_CODE_ACCESS.
1668  */
1669
1670 static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr,
1671                               TCGMemOpIdx oi, uintptr_t retaddr)
1672 {
1673     return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu);
1674 }
1675
1676 tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
1677                                      TCGMemOpIdx oi, uintptr_t retaddr)
1678 {
1679     return full_ldub_mmu(env, addr, oi, retaddr);
1680 }
1681
1682 static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr,
1683                                  TCGMemOpIdx oi, uintptr_t retaddr)
1684 {
1685     return load_helper(env, addr, oi, retaddr, MO_LEUW, false,
1686                        full_le_lduw_mmu);
1687 }
1688
1689 tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
1690                                     TCGMemOpIdx oi, uintptr_t retaddr)
1691 {
1692     return full_le_lduw_mmu(env, addr, oi, retaddr);
1693 }
1694
1695 static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr,
1696                                  TCGMemOpIdx oi, uintptr_t retaddr)
1697 {
1698     return load_helper(env, addr, oi, retaddr, MO_BEUW, false,
1699                        full_be_lduw_mmu);
1700 }
1701
1702 tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
1703                                     TCGMemOpIdx oi, uintptr_t retaddr)
1704 {
1705     return full_be_lduw_mmu(env, addr, oi, retaddr);
1706 }
1707
1708 static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr,
1709                                  TCGMemOpIdx oi, uintptr_t retaddr)
1710 {
1711     return load_helper(env, addr, oi, retaddr, MO_LEUL, false,
1712                        full_le_ldul_mmu);
1713 }
1714
1715 tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
1716                                     TCGMemOpIdx oi, uintptr_t retaddr)
1717 {
1718     return full_le_ldul_mmu(env, addr, oi, retaddr);
1719 }
1720
1721 static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr,
1722                                  TCGMemOpIdx oi, uintptr_t retaddr)
1723 {
1724     return load_helper(env, addr, oi, retaddr, MO_BEUL, false,
1725                        full_be_ldul_mmu);
1726 }
1727
1728 tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
1729                                     TCGMemOpIdx oi, uintptr_t retaddr)
1730 {
1731     return full_be_ldul_mmu(env, addr, oi, retaddr);
1732 }
1733
1734 uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
1735                            TCGMemOpIdx oi, uintptr_t retaddr)
1736 {
1737     return load_helper(env, addr, oi, retaddr, MO_LEQ, false,
1738                        helper_le_ldq_mmu);
1739 }
1740
1741 uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
1742                            TCGMemOpIdx oi, uintptr_t retaddr)
1743 {
1744     return load_helper(env, addr, oi, retaddr, MO_BEQ, false,
1745                        helper_be_ldq_mmu);
1746 }
1747
1748 /*
1749  * Provide signed versions of the load routines as well.  We can of course
1750  * avoid this for 64-bit data, or for 32-bit data on 32-bit host.
1751  */
1752
1753
1754 tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
1755                                      TCGMemOpIdx oi, uintptr_t retaddr)
1756 {
1757     return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr);
1758 }
1759
1760 tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr,
1761                                     TCGMemOpIdx oi, uintptr_t retaddr)
1762 {
1763     return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr);
1764 }
1765
1766 tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr,
1767                                     TCGMemOpIdx oi, uintptr_t retaddr)
1768 {
1769     return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr);
1770 }
1771
1772 tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr,
1773                                     TCGMemOpIdx oi, uintptr_t retaddr)
1774 {
1775     return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr);
1776 }
1777
1778 tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
1779                                     TCGMemOpIdx oi, uintptr_t retaddr)
1780 {
1781     return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr);
1782 }
1783
1784 /*
1785  * Load helpers for cpu_ldst.h.
1786  */
1787
1788 static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr,
1789                                        int mmu_idx, uintptr_t retaddr,
1790                                        MemOp op, FullLoadHelper *full_load)
1791 {
1792     uint16_t meminfo;
1793     TCGMemOpIdx oi;
1794     uint64_t ret;
1795
1796     meminfo = trace_mem_get_info(op, mmu_idx, false);
1797     trace_guest_mem_before_exec(env_cpu(env), addr, meminfo);
1798
1799     op &= ~MO_SIGN;
1800     oi = make_memop_idx(op, mmu_idx);
1801     ret = full_load(env, addr, oi, retaddr);
1802
1803     qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo);
1804
1805     return ret;
1806 }
1807
1808 uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1809                             int mmu_idx, uintptr_t ra)
1810 {
1811     return cpu_load_helper(env, addr, mmu_idx, ra, MO_UB, full_ldub_mmu);
1812 }
1813
1814 int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1815                        int mmu_idx, uintptr_t ra)
1816 {
1817     return (int8_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_SB,
1818                                    full_ldub_mmu);
1819 }
1820
1821 uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1822                                int mmu_idx, uintptr_t ra)
1823 {
1824     return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUW, full_be_lduw_mmu);
1825 }
1826
1827 int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1828                           int mmu_idx, uintptr_t ra)
1829 {
1830     return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_BESW,
1831                                     full_be_lduw_mmu);
1832 }
1833
1834 uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1835                               int mmu_idx, uintptr_t ra)
1836 {
1837     return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUL, full_be_ldul_mmu);
1838 }
1839
1840 uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1841                               int mmu_idx, uintptr_t ra)
1842 {
1843     return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEQ, helper_be_ldq_mmu);
1844 }
1845
1846 uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1847                                int mmu_idx, uintptr_t ra)
1848 {
1849     return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUW, full_le_lduw_mmu);
1850 }
1851
1852 int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1853                           int mmu_idx, uintptr_t ra)
1854 {
1855     return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_LESW,
1856                                     full_le_lduw_mmu);
1857 }
1858
1859 uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1860                               int mmu_idx, uintptr_t ra)
1861 {
1862     return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUL, full_le_ldul_mmu);
1863 }
1864
1865 uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1866                               int mmu_idx, uintptr_t ra)
1867 {
1868     return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEQ, helper_le_ldq_mmu);
1869 }
1870
1871 uint32_t cpu_ldub_data_ra(CPUArchState *env, target_ulong ptr,
1872                           uintptr_t retaddr)
1873 {
1874     return cpu_ldub_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1875 }
1876
1877 int cpu_ldsb_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
1878 {
1879     return cpu_ldsb_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1880 }
1881
1882 uint32_t cpu_lduw_be_data_ra(CPUArchState *env, target_ulong ptr,
1883                              uintptr_t retaddr)
1884 {
1885     return cpu_lduw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1886 }
1887
1888 int cpu_ldsw_be_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
1889 {
1890     return cpu_ldsw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1891 }
1892
1893 uint32_t cpu_ldl_be_data_ra(CPUArchState *env, target_ulong ptr,
1894                             uintptr_t retaddr)
1895 {
1896     return cpu_ldl_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1897 }
1898
1899 uint64_t cpu_ldq_be_data_ra(CPUArchState *env, target_ulong ptr,
1900                             uintptr_t retaddr)
1901 {
1902     return cpu_ldq_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1903 }
1904
1905 uint32_t cpu_lduw_le_data_ra(CPUArchState *env, target_ulong ptr,
1906                              uintptr_t retaddr)
1907 {
1908     return cpu_lduw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1909 }
1910
1911 int cpu_ldsw_le_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
1912 {
1913     return cpu_ldsw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1914 }
1915
1916 uint32_t cpu_ldl_le_data_ra(CPUArchState *env, target_ulong ptr,
1917                             uintptr_t retaddr)
1918 {
1919     return cpu_ldl_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1920 }
1921
1922 uint64_t cpu_ldq_le_data_ra(CPUArchState *env, target_ulong ptr,
1923                             uintptr_t retaddr)
1924 {
1925     return cpu_ldq_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1926 }
1927
1928 uint32_t cpu_ldub_data(CPUArchState *env, target_ulong ptr)
1929 {
1930     return cpu_ldub_data_ra(env, ptr, 0);
1931 }
1932
1933 int cpu_ldsb_data(CPUArchState *env, target_ulong ptr)
1934 {
1935     return cpu_ldsb_data_ra(env, ptr, 0);
1936 }
1937
1938 uint32_t cpu_lduw_be_data(CPUArchState *env, target_ulong ptr)
1939 {
1940     return cpu_lduw_be_data_ra(env, ptr, 0);
1941 }
1942
1943 int cpu_ldsw_be_data(CPUArchState *env, target_ulong ptr)
1944 {
1945     return cpu_ldsw_be_data_ra(env, ptr, 0);
1946 }
1947
1948 uint32_t cpu_ldl_be_data(CPUArchState *env, target_ulong ptr)
1949 {
1950     return cpu_ldl_be_data_ra(env, ptr, 0);
1951 }
1952
1953 uint64_t cpu_ldq_be_data(CPUArchState *env, target_ulong ptr)
1954 {
1955     return cpu_ldq_be_data_ra(env, ptr, 0);
1956 }
1957
1958 uint32_t cpu_lduw_le_data(CPUArchState *env, target_ulong ptr)
1959 {
1960     return cpu_lduw_le_data_ra(env, ptr, 0);
1961 }
1962
1963 int cpu_ldsw_le_data(CPUArchState *env, target_ulong ptr)
1964 {
1965     return cpu_ldsw_le_data_ra(env, ptr, 0);
1966 }
1967
1968 uint32_t cpu_ldl_le_data(CPUArchState *env, target_ulong ptr)
1969 {
1970     return cpu_ldl_le_data_ra(env, ptr, 0);
1971 }
1972
1973 uint64_t cpu_ldq_le_data(CPUArchState *env, target_ulong ptr)
1974 {
1975     return cpu_ldq_le_data_ra(env, ptr, 0);
1976 }
1977
1978 /*
1979  * Store Helpers
1980  */
1981
1982 static inline void QEMU_ALWAYS_INLINE
1983 store_memop(void *haddr, uint64_t val, MemOp op)
1984 {
1985     switch (op) {
1986     case MO_UB:
1987         stb_p(haddr, val);
1988         break;
1989     case MO_BEUW:
1990         stw_be_p(haddr, val);
1991         break;
1992     case MO_LEUW:
1993         stw_le_p(haddr, val);
1994         break;
1995     case MO_BEUL:
1996         stl_be_p(haddr, val);
1997         break;
1998     case MO_LEUL:
1999         stl_le_p(haddr, val);
2000         break;
2001     case MO_BEQ:
2002         stq_be_p(haddr, val);
2003         break;
2004     case MO_LEQ:
2005         stq_le_p(haddr, val);
2006         break;
2007     default:
2008         qemu_build_not_reached();
2009     }
2010 }
2011
2012 static void __attribute__((noinline))
2013 store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
2014                        uintptr_t retaddr, size_t size, uintptr_t mmu_idx,
2015                        bool big_endian)
2016 {
2017     const size_t tlb_off = offsetof(CPUTLBEntry, addr_write);
2018     uintptr_t index, index2;
2019     CPUTLBEntry *entry, *entry2;
2020     target_ulong page2, tlb_addr, tlb_addr2;
2021     TCGMemOpIdx oi;
2022     size_t size2;
2023     int i;
2024
2025     /*
2026      * Ensure the second page is in the TLB.  Note that the first page
2027      * is already guaranteed to be filled, and that the second page
2028      * cannot evict the first.
2029      */
2030     page2 = (addr + size) & TARGET_PAGE_MASK;
2031     size2 = (addr + size) & ~TARGET_PAGE_MASK;
2032     index2 = tlb_index(env, mmu_idx, page2);
2033     entry2 = tlb_entry(env, mmu_idx, page2);
2034
2035     tlb_addr2 = tlb_addr_write(entry2);
2036     if (!tlb_hit_page(tlb_addr2, page2)) {
2037         if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) {
2038             tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE,
2039                      mmu_idx, retaddr);
2040             index2 = tlb_index(env, mmu_idx, page2);
2041             entry2 = tlb_entry(env, mmu_idx, page2);
2042         }
2043         tlb_addr2 = tlb_addr_write(entry2);
2044     }
2045
2046     index = tlb_index(env, mmu_idx, addr);
2047     entry = tlb_entry(env, mmu_idx, addr);
2048     tlb_addr = tlb_addr_write(entry);
2049
2050     /*
2051      * Handle watchpoints.  Since this may trap, all checks
2052      * must happen before any store.
2053      */
2054     if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
2055         cpu_check_watchpoint(env_cpu(env), addr, size - size2,
2056                              env_tlb(env)->d[mmu_idx].iotlb[index].attrs,
2057                              BP_MEM_WRITE, retaddr);
2058     }
2059     if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) {
2060         cpu_check_watchpoint(env_cpu(env), page2, size2,
2061                              env_tlb(env)->d[mmu_idx].iotlb[index2].attrs,
2062                              BP_MEM_WRITE, retaddr);
2063     }
2064
2065     /*
2066      * XXX: not efficient, but simple.
2067      * This loop must go in the forward direction to avoid issues
2068      * with self-modifying code in Windows 64-bit.
2069      */
2070     oi = make_memop_idx(MO_UB, mmu_idx);
2071     if (big_endian) {
2072         for (i = 0; i < size; ++i) {
2073             /* Big-endian extract.  */
2074             uint8_t val8 = val >> (((size - 1) * 8) - (i * 8));
2075             helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr);
2076         }
2077     } else {
2078         for (i = 0; i < size; ++i) {
2079             /* Little-endian extract.  */
2080             uint8_t val8 = val >> (i * 8);
2081             helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr);
2082         }
2083     }
2084 }
2085
2086 static inline void QEMU_ALWAYS_INLINE
2087 store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
2088              TCGMemOpIdx oi, uintptr_t retaddr, MemOp op)
2089 {
2090     uintptr_t mmu_idx = get_mmuidx(oi);
2091     uintptr_t index = tlb_index(env, mmu_idx, addr);
2092     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
2093     target_ulong tlb_addr = tlb_addr_write(entry);
2094     const size_t tlb_off = offsetof(CPUTLBEntry, addr_write);
2095     unsigned a_bits = get_alignment_bits(get_memop(oi));
2096     void *haddr;
2097     size_t size = memop_size(op);
2098
2099     /* Handle CPU specific unaligned behaviour */
2100     if (addr & ((1 << a_bits) - 1)) {
2101         cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
2102                              mmu_idx, retaddr);
2103     }
2104
2105     /* If the TLB entry is for a different page, reload and try again.  */
2106     if (!tlb_hit(tlb_addr, addr)) {
2107         if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
2108             addr & TARGET_PAGE_MASK)) {
2109             tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE,
2110                      mmu_idx, retaddr);
2111             index = tlb_index(env, mmu_idx, addr);
2112             entry = tlb_entry(env, mmu_idx, addr);
2113         }
2114         tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK;
2115     }
2116
2117     /* Handle anything that isn't just a straight memory access.  */
2118     if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
2119         CPUIOTLBEntry *iotlbentry;
2120         bool need_swap;
2121
2122         /* For anything that is unaligned, recurse through byte stores.  */
2123         if ((addr & (size - 1)) != 0) {
2124             goto do_unaligned_access;
2125         }
2126
2127         iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
2128
2129         /* Handle watchpoints.  */
2130         if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
2131             /* On watchpoint hit, this will longjmp out.  */
2132             cpu_check_watchpoint(env_cpu(env), addr, size,
2133                                  iotlbentry->attrs, BP_MEM_WRITE, retaddr);
2134         }
2135
2136         need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
2137
2138         /* Handle I/O access.  */
2139         if (tlb_addr & TLB_MMIO) {
2140             io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr,
2141                       op ^ (need_swap * MO_BSWAP));
2142             return;
2143         }
2144
2145         /* Ignore writes to ROM.  */
2146         if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) {
2147             return;
2148         }
2149
2150         /* Handle clean RAM pages.  */
2151         if (tlb_addr & TLB_NOTDIRTY) {
2152             notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
2153         }
2154
2155         haddr = (void *)((uintptr_t)addr + entry->addend);
2156
2157         /*
2158          * Keep these two store_memop separate to ensure that the compiler
2159          * is able to fold the entire function to a single instruction.
2160          * There is a build-time assert inside to remind you of this.  ;-)
2161          */
2162         if (unlikely(need_swap)) {
2163             store_memop(haddr, val, op ^ MO_BSWAP);
2164         } else {
2165             store_memop(haddr, val, op);
2166         }
2167         return;
2168     }
2169
2170     /* Handle slow unaligned access (it spans two pages or IO).  */
2171     if (size > 1
2172         && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
2173                      >= TARGET_PAGE_SIZE)) {
2174     do_unaligned_access:
2175         store_helper_unaligned(env, addr, val, retaddr, size,
2176                                mmu_idx, memop_big_endian(op));
2177         return;
2178     }
2179
2180     haddr = (void *)((uintptr_t)addr + entry->addend);
2181     store_memop(haddr, val, op);
2182 }
2183
2184 void __attribute__((noinline))
2185 helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
2186                    TCGMemOpIdx oi, uintptr_t retaddr)
2187 {
2188     store_helper(env, addr, val, oi, retaddr, MO_UB);
2189 }
2190
2191 void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
2192                        TCGMemOpIdx oi, uintptr_t retaddr)
2193 {
2194     store_helper(env, addr, val, oi, retaddr, MO_LEUW);
2195 }
2196
2197 void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
2198                        TCGMemOpIdx oi, uintptr_t retaddr)
2199 {
2200     store_helper(env, addr, val, oi, retaddr, MO_BEUW);
2201 }
2202
2203 void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
2204                        TCGMemOpIdx oi, uintptr_t retaddr)
2205 {
2206     store_helper(env, addr, val, oi, retaddr, MO_LEUL);
2207 }
2208
2209 void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
2210                        TCGMemOpIdx oi, uintptr_t retaddr)
2211 {
2212     store_helper(env, addr, val, oi, retaddr, MO_BEUL);
2213 }
2214
2215 void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
2216                        TCGMemOpIdx oi, uintptr_t retaddr)
2217 {
2218     store_helper(env, addr, val, oi, retaddr, MO_LEQ);
2219 }
2220
2221 void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
2222                        TCGMemOpIdx oi, uintptr_t retaddr)
2223 {
2224     store_helper(env, addr, val, oi, retaddr, MO_BEQ);
2225 }
2226
2227 /*
2228  * Store Helpers for cpu_ldst.h
2229  */
2230
2231 static inline void QEMU_ALWAYS_INLINE
2232 cpu_store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
2233                  int mmu_idx, uintptr_t retaddr, MemOp op)
2234 {
2235     TCGMemOpIdx oi;
2236     uint16_t meminfo;
2237
2238     meminfo = trace_mem_get_info(op, mmu_idx, true);
2239     trace_guest_mem_before_exec(env_cpu(env), addr, meminfo);
2240
2241     oi = make_memop_idx(op, mmu_idx);
2242     store_helper(env, addr, val, oi, retaddr, op);
2243
2244     qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo);
2245 }
2246
2247 void cpu_stb_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2248                        int mmu_idx, uintptr_t retaddr)
2249 {
2250     cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_UB);
2251 }
2252
2253 void cpu_stw_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2254                           int mmu_idx, uintptr_t retaddr)
2255 {
2256     cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUW);
2257 }
2258
2259 void cpu_stl_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2260                           int mmu_idx, uintptr_t retaddr)
2261 {
2262     cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUL);
2263 }
2264
2265 void cpu_stq_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val,
2266                           int mmu_idx, uintptr_t retaddr)
2267 {
2268     cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEQ);
2269 }
2270
2271 void cpu_stw_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2272                           int mmu_idx, uintptr_t retaddr)
2273 {
2274     cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUW);
2275 }
2276
2277 void cpu_stl_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2278                           int mmu_idx, uintptr_t retaddr)
2279 {
2280     cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUL);
2281 }
2282
2283 void cpu_stq_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val,
2284                           int mmu_idx, uintptr_t retaddr)
2285 {
2286     cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEQ);
2287 }
2288
2289 void cpu_stb_data_ra(CPUArchState *env, target_ulong ptr,
2290                      uint32_t val, uintptr_t retaddr)
2291 {
2292     cpu_stb_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2293 }
2294
2295 void cpu_stw_be_data_ra(CPUArchState *env, target_ulong ptr,
2296                         uint32_t val, uintptr_t retaddr)
2297 {
2298     cpu_stw_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2299 }
2300
2301 void cpu_stl_be_data_ra(CPUArchState *env, target_ulong ptr,
2302                         uint32_t val, uintptr_t retaddr)
2303 {
2304     cpu_stl_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2305 }
2306
2307 void cpu_stq_be_data_ra(CPUArchState *env, target_ulong ptr,
2308                         uint64_t val, uintptr_t retaddr)
2309 {
2310     cpu_stq_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2311 }
2312
2313 void cpu_stw_le_data_ra(CPUArchState *env, target_ulong ptr,
2314                         uint32_t val, uintptr_t retaddr)
2315 {
2316     cpu_stw_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2317 }
2318
2319 void cpu_stl_le_data_ra(CPUArchState *env, target_ulong ptr,
2320                         uint32_t val, uintptr_t retaddr)
2321 {
2322     cpu_stl_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2323 }
2324
2325 void cpu_stq_le_data_ra(CPUArchState *env, target_ulong ptr,
2326                         uint64_t val, uintptr_t retaddr)
2327 {
2328     cpu_stq_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2329 }
2330
2331 void cpu_stb_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2332 {
2333     cpu_stb_data_ra(env, ptr, val, 0);
2334 }
2335
2336 void cpu_stw_be_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2337 {
2338     cpu_stw_be_data_ra(env, ptr, val, 0);
2339 }
2340
2341 void cpu_stl_be_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2342 {
2343     cpu_stl_be_data_ra(env, ptr, val, 0);
2344 }
2345
2346 void cpu_stq_be_data(CPUArchState *env, target_ulong ptr, uint64_t val)
2347 {
2348     cpu_stq_be_data_ra(env, ptr, val, 0);
2349 }
2350
2351 void cpu_stw_le_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2352 {
2353     cpu_stw_le_data_ra(env, ptr, val, 0);
2354 }
2355
2356 void cpu_stl_le_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2357 {
2358     cpu_stl_le_data_ra(env, ptr, val, 0);
2359 }
2360
2361 void cpu_stq_le_data(CPUArchState *env, target_ulong ptr, uint64_t val)
2362 {
2363     cpu_stq_le_data_ra(env, ptr, val, 0);
2364 }
2365
2366 /* First set of helpers allows passing in of OI and RETADDR.  This makes
2367    them callable from other helpers.  */
2368
2369 #define EXTRA_ARGS     , TCGMemOpIdx oi, uintptr_t retaddr
2370 #define ATOMIC_NAME(X) \
2371     HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu))
2372 #define ATOMIC_MMU_DECLS
2373 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr)
2374 #define ATOMIC_MMU_CLEANUP
2375 #define ATOMIC_MMU_IDX   get_mmuidx(oi)
2376
2377 #include "atomic_common.c.inc"
2378
2379 #define DATA_SIZE 1
2380 #include "atomic_template.h"
2381
2382 #define DATA_SIZE 2
2383 #include "atomic_template.h"
2384
2385 #define DATA_SIZE 4
2386 #include "atomic_template.h"
2387
2388 #ifdef CONFIG_ATOMIC64
2389 #define DATA_SIZE 8
2390 #include "atomic_template.h"
2391 #endif
2392
2393 #if HAVE_CMPXCHG128 || HAVE_ATOMIC128
2394 #define DATA_SIZE 16
2395 #include "atomic_template.h"
2396 #endif
2397
2398 /* Second set of helpers are directly callable from TCG as helpers.  */
2399
2400 #undef EXTRA_ARGS
2401 #undef ATOMIC_NAME
2402 #undef ATOMIC_MMU_LOOKUP
2403 #define EXTRA_ARGS         , TCGMemOpIdx oi
2404 #define ATOMIC_NAME(X)     HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
2405 #define ATOMIC_MMU_LOOKUP  atomic_mmu_lookup(env, addr, oi, GETPC())
2406
2407 #define DATA_SIZE 1
2408 #include "atomic_template.h"
2409
2410 #define DATA_SIZE 2
2411 #include "atomic_template.h"
2412
2413 #define DATA_SIZE 4
2414 #include "atomic_template.h"
2415
2416 #ifdef CONFIG_ATOMIC64
2417 #define DATA_SIZE 8
2418 #include "atomic_template.h"
2419 #endif
2420 #undef ATOMIC_MMU_IDX
2421
2422 /* Code access functions.  */
2423
2424 static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr,
2425                                TCGMemOpIdx oi, uintptr_t retaddr)
2426 {
2427     return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_code);
2428 }
2429
2430 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr)
2431 {
2432     TCGMemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true));
2433     return full_ldub_code(env, addr, oi, 0);
2434 }
2435
2436 static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr,
2437                                TCGMemOpIdx oi, uintptr_t retaddr)
2438 {
2439     return load_helper(env, addr, oi, retaddr, MO_TEUW, true, full_lduw_code);
2440 }
2441
2442 uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr)
2443 {
2444     TCGMemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true));
2445     return full_lduw_code(env, addr, oi, 0);
2446 }
2447
2448 static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr,
2449                               TCGMemOpIdx oi, uintptr_t retaddr)
2450 {
2451     return load_helper(env, addr, oi, retaddr, MO_TEUL, true, full_ldl_code);
2452 }
2453
2454 uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr)
2455 {
2456     TCGMemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true));
2457     return full_ldl_code(env, addr, oi, 0);
2458 }
2459
2460 static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr,
2461                               TCGMemOpIdx oi, uintptr_t retaddr)
2462 {
2463     return load_helper(env, addr, oi, retaddr, MO_TEQ, true, full_ldq_code);
2464 }
2465
2466 uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr)
2467 {
2468     TCGMemOpIdx oi = make_memop_idx(MO_TEQ, cpu_mmu_index(env, true));
2469     return full_ldq_code(env, addr, oi, 0);
2470 }