accel/tcg/cputlb.c

   1 /*
   2  *  Common CPU TLB handling
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include "qemu/main-loop.h"
  22 #include "cpu.h"
  23 #include "exec/exec-all.h"
  24 #include "exec/memory.h"
  25 #include "exec/address-spaces.h"
  26 #include "exec/cpu_ldst.h"
  27 #include "exec/cputlb.h"
  28 #include "exec/memory-internal.h"
  29 #include "exec/ram_addr.h"
  30 #include "tcg/tcg.h"
  31 #include "qemu/error-report.h"
  32 #include "exec/log.h"
  33 #include "exec/helper-proto.h"
  34 #include "qemu/atomic.h"
  35 #include "qemu/atomic128.h"
  36 #include "translate-all.h"
  37 #include "trace-root.h"
  38 #include "trace/mem.h"
  39 #ifdef CONFIG_PLUGIN
  40 #include "qemu/plugin-memory.h"
  41 #endif
  42
  43 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
  44 /* #define DEBUG_TLB */
  45 /* #define DEBUG_TLB_LOG */
  46
  47 #ifdef DEBUG_TLB
  48 # define DEBUG_TLB_GATE 1
  49 # ifdef DEBUG_TLB_LOG
  50 #  define DEBUG_TLB_LOG_GATE 1
  51 # else
  52 #  define DEBUG_TLB_LOG_GATE 0
  53 # endif
  54 #else
  55 # define DEBUG_TLB_GATE 0
  56 # define DEBUG_TLB_LOG_GATE 0
  57 #endif
  58
  59 #define tlb_debug(fmt, ...) do { \
  60     if (DEBUG_TLB_LOG_GATE) { \
  61         qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \
  62                       ## __VA_ARGS__); \
  63     } else if (DEBUG_TLB_GATE) { \
  64         fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \
  65     } \
  66 } while (0)
  67
  68 #define assert_cpu_is_self(cpu) do {                              \
  69         if (DEBUG_TLB_GATE) {                                     \
  70             g_assert(!(cpu)->created || qemu_cpu_is_self(cpu));   \
  71         }                                                         \
  72     } while (0)
  73
  74 /* run_on_cpu_data.target_ptr should always be big enough for a
  75  * target_ulong even on 32 bit builds */
  76 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
  77
  78 /* We currently can't handle more than 16 bits in the MMUIDX bitmask.
  79  */
  80 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
  81 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
  82
  83 static inline size_t tlb_n_entries(CPUTLBDescFast *fast)
  84 {
  85     return (fast->mask >> CPU_TLB_ENTRY_BITS) + 1;
  86 }
  87
  88 static inline size_t sizeof_tlb(CPUTLBDescFast *fast)
  89 {
  90     return fast->mask + (1 << CPU_TLB_ENTRY_BITS);
  91 }
  92
  93 static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
  94                              size_t max_entries)
  95 {
  96     desc->window_begin_ns = ns;
  97     desc->window_max_entries = max_entries;
  98 }
  99
 100 static void tlb_dyn_init(CPUArchState *env)
 101 {
 102     int i;
 103
 104     for (i = 0; i < NB_MMU_MODES; i++) {
 105         CPUTLBDesc *desc = &env_tlb(env)->d[i];
 106         size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
 107
 108         tlb_window_reset(desc, get_clock_realtime(), 0);
 109         desc->n_used_entries = 0;
 110         env_tlb(env)->f[i].mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
 111         env_tlb(env)->f[i].table = g_new(CPUTLBEntry, n_entries);
 112         env_tlb(env)->d[i].iotlb = g_new(CPUIOTLBEntry, n_entries);
 113     }
 114 }
 115
 116 /**
 117  * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
 118  * @desc: The CPUTLBDesc portion of the TLB
 119  * @fast: The CPUTLBDescFast portion of the same TLB
 120  *
 121  * Called with tlb_lock_held.
 122  *
 123  * We have two main constraints when resizing a TLB: (1) we only resize it
 124  * on a TLB flush (otherwise we'd have to take a perf hit by either rehashing
 125  * the array or unnecessarily flushing it), which means we do not control how
 126  * frequently the resizing can occur; (2) we don't have access to the guest's
 127  * future scheduling decisions, and therefore have to decide the magnitude of
 128  * the resize based on past observations.
 129  *
 130  * In general, a memory-hungry process can benefit greatly from an appropriately
 131  * sized TLB, since a guest TLB miss is very expensive. This doesn't mean that
 132  * we just have to make the TLB as large as possible; while an oversized TLB
 133  * results in minimal TLB miss rates, it also takes longer to be flushed
 134  * (flushes can be _very_ frequent), and the reduced locality can also hurt
 135  * performance.
 136  *
 137  * To achieve near-optimal performance for all kinds of workloads, we:
 138  *
 139  * 1. Aggressively increase the size of the TLB when the use rate of the
 140  * TLB being flushed is high, since it is likely that in the near future this
 141  * memory-hungry process will execute again, and its memory hungriness will
 142  * probably be similar.
 143  *
 144  * 2. Slowly reduce the size of the TLB as the use rate declines over a
 145  * reasonably large time window. The rationale is that if in such a time window
 146  * we have not observed a high TLB use rate, it is likely that we won't observe
 147  * it in the near future. In that case, once a time window expires we downsize
 148  * the TLB to match the maximum use rate observed in the window.
 149  *
 150  * 3. Try to keep the maximum use rate in a time window in the 30-70% range,
 151  * since in that range performance is likely near-optimal. Recall that the TLB
 152  * is direct mapped, so we want the use rate to be low (or at least not too
 153  * high), since otherwise we are likely to have a significant amount of
 154  * conflict misses.
 155  */
 156 static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
 157 {
 158     size_t old_size = tlb_n_entries(fast);
 159     size_t rate;
 160     size_t new_size = old_size;
 161     int64_t now = get_clock_realtime();
 162     int64_t window_len_ms = 100;
 163     int64_t window_len_ns = window_len_ms * 1000 * 1000;
 164     bool window_expired = now > desc->window_begin_ns + window_len_ns;
 165
 166     if (desc->n_used_entries > desc->window_max_entries) {
 167         desc->window_max_entries = desc->n_used_entries;
 168     }
 169     rate = desc->window_max_entries * 100 / old_size;
 170
 171     if (rate > 70) {
 172         new_size = MIN(old_size << 1, 1 << CPU_TLB_DYN_MAX_BITS);
 173     } else if (rate < 30 && window_expired) {
 174         size_t ceil = pow2ceil(desc->window_max_entries);
 175         size_t expected_rate = desc->window_max_entries * 100 / ceil;
 176
 177         /*
 178          * Avoid undersizing when the max number of entries seen is just below
 179          * a pow2. For instance, if max_entries == 1025, the expected use rate
 180          * would be 1025/2048==50%. However, if max_entries == 1023, we'd get
 181          * 1023/1024==99.9% use rate, so we'd likely end up doubling the size
 182          * later. Thus, make sure that the expected use rate remains below 70%.
 183          * (and since we double the size, that means the lowest rate we'd
 184          * expect to get is 35%, which is still in the 30-70% range where
 185          * we consider that the size is appropriate.)
 186          */
 187         if (expected_rate > 70) {
 188             ceil *= 2;
 189         }
 190         new_size = MAX(ceil, 1 << CPU_TLB_DYN_MIN_BITS);
 191     }
 192
 193     if (new_size == old_size) {
 194         if (window_expired) {
 195             tlb_window_reset(desc, now, desc->n_used_entries);
 196         }
 197         return;
 198     }
 199
 200     g_free(fast->table);
 201     g_free(desc->iotlb);
 202
 203     tlb_window_reset(desc, now, 0);
 204     /* desc->n_used_entries is cleared by the caller */
 205     fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
 206     fast->table = g_try_new(CPUTLBEntry, new_size);
 207     desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
 208
 209     /*
 210      * If the allocations fail, try smaller sizes. We just freed some
 211      * memory, so going back to half of new_size has a good chance of working.
 212      * Increased memory pressure elsewhere in the system might cause the
 213      * allocations to fail though, so we progressively reduce the allocation
 214      * size, aborting if we cannot even allocate the smallest TLB we support.
 215      */
 216     while (fast->table == NULL || desc->iotlb == NULL) {
 217         if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) {
 218             error_report("%s: %s", __func__, strerror(errno));
 219             abort();
 220         }
 221         new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS);
 222         fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
 223
 224         g_free(fast->table);
 225         g_free(desc->iotlb);
 226         fast->table = g_try_new(CPUTLBEntry, new_size);
 227         desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
 228     }
 229 }
 230
 231 static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
 232 {
 233     desc->n_used_entries = 0;
 234     desc->large_page_addr = -1;
 235     desc->large_page_mask = -1;
 236     desc->vindex = 0;
 237     memset(fast->table, -1, sizeof_tlb(fast));
 238     memset(desc->vtable, -1, sizeof(desc->vtable));
 239 }
 240
 241 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
 242 {
 243     CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
 244     CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
 245
 246     tlb_mmu_resize_locked(desc, fast);
 247     tlb_mmu_flush_locked(desc, fast);
 248 }
 249
 250 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
 251 {
 252     env_tlb(env)->d[mmu_idx].n_used_entries++;
 253 }
 254
 255 static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx)
 256 {
 257     env_tlb(env)->d[mmu_idx].n_used_entries--;
 258 }
 259
 260 void tlb_init(CPUState *cpu)
 261 {
 262     CPUArchState *env = cpu->env_ptr;
 263
 264     qemu_spin_init(&env_tlb(env)->c.lock);
 265
 266     /* Ensure that cpu_reset performs a full flush.  */
 267     env_tlb(env)->c.dirty = ALL_MMUIDX_BITS;
 268
 269     tlb_dyn_init(env);
 270 }
 271
 272 /* flush_all_helper: run fn across all cpus
 273  *
 274  * If the wait flag is set then the src cpu's helper will be queued as
 275  * "safe" work and the loop exited creating a synchronisation point
 276  * where all queued work will be finished before execution starts
 277  * again.
 278  */
 279 static void flush_all_helper(CPUState *src, run_on_cpu_func fn,
 280                              run_on_cpu_data d)
 281 {
 282     CPUState *cpu;
 283
 284     CPU_FOREACH(cpu) {
 285         if (cpu != src) {
 286             async_run_on_cpu(cpu, fn, d);
 287         }
 288     }
 289 }
 290
 291 void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide)
 292 {
 293     CPUState *cpu;
 294     size_t full = 0, part = 0, elide = 0;
 295
 296     CPU_FOREACH(cpu) {
 297         CPUArchState *env = cpu->env_ptr;
 298
 299         full += atomic_read(&env_tlb(env)->c.full_flush_count);
 300         part += atomic_read(&env_tlb(env)->c.part_flush_count);
 301         elide += atomic_read(&env_tlb(env)->c.elide_flush_count);
 302     }
 303     *pfull = full;
 304     *ppart = part;
 305     *pelide = elide;
 306 }
 307
 308 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
 309 {
 310     CPUArchState *env = cpu->env_ptr;
 311     uint16_t asked = data.host_int;
 312     uint16_t all_dirty, work, to_clean;
 313
 314     assert_cpu_is_self(cpu);
 315
 316     tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked);
 317
 318     qemu_spin_lock(&env_tlb(env)->c.lock);
 319
 320     all_dirty = env_tlb(env)->c.dirty;
 321     to_clean = asked & all_dirty;
 322     all_dirty &= ~to_clean;
 323     env_tlb(env)->c.dirty = all_dirty;
 324
 325     for (work = to_clean; work != 0; work &= work - 1) {
 326         int mmu_idx = ctz32(work);
 327         tlb_flush_one_mmuidx_locked(env, mmu_idx);
 328     }
 329
 330     qemu_spin_unlock(&env_tlb(env)->c.lock);
 331
 332     cpu_tb_jmp_cache_clear(cpu);
 333
 334     if (to_clean == ALL_MMUIDX_BITS) {
 335         atomic_set(&env_tlb(env)->c.full_flush_count,
 336                    env_tlb(env)->c.full_flush_count + 1);
 337     } else {
 338         atomic_set(&env_tlb(env)->c.part_flush_count,
 339                    env_tlb(env)->c.part_flush_count + ctpop16(to_clean));
 340         if (to_clean != asked) {
 341             atomic_set(&env_tlb(env)->c.elide_flush_count,
 342                        env_tlb(env)->c.elide_flush_count +
 343                        ctpop16(asked & ~to_clean));
 344         }
 345     }
 346 }
 347
 348 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
 349 {
 350     tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap);
 351
 352     if (cpu->created && !qemu_cpu_is_self(cpu)) {
 353         async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work,
 354                          RUN_ON_CPU_HOST_INT(idxmap));
 355     } else {
 356         tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap));
 357     }
 358 }
 359
 360 void tlb_flush(CPUState *cpu)
 361 {
 362     tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS);
 363 }
 364
 365 void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap)
 366 {
 367     const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
 368
 369     tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
 370
 371     flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
 372     fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap));
 373 }
 374
 375 void tlb_flush_all_cpus(CPUState *src_cpu)
 376 {
 377     tlb_flush_by_mmuidx_all_cpus(src_cpu, ALL_MMUIDX_BITS);
 378 }
 379
 380 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap)
 381 {
 382     const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
 383
 384     tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
 385
 386     flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
 387     async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
 388 }
 389
 390 void tlb_flush_all_cpus_synced(CPUState *src_cpu)
 391 {
 392     tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS);
 393 }
 394
 395 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry,
 396                                         target_ulong page)
 397 {
 398     return tlb_hit_page(tlb_entry->addr_read, page) ||
 399            tlb_hit_page(tlb_addr_write(tlb_entry), page) ||
 400            tlb_hit_page(tlb_entry->addr_code, page);
 401 }
 402
 403 /**
 404  * tlb_entry_is_empty - return true if the entry is not in use
 405  * @te: pointer to CPUTLBEntry
 406  */
 407 static inline bool tlb_entry_is_empty(const CPUTLBEntry *te)
 408 {
 409     return te->addr_read == -1 && te->addr_write == -1 && te->addr_code == -1;
 410 }
 411
 412 /* Called with tlb_c.lock held */
 413 static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry,
 414                                           target_ulong page)
 415 {
 416     if (tlb_hit_page_anyprot(tlb_entry, page)) {
 417         memset(tlb_entry, -1, sizeof(*tlb_entry));
 418         return true;
 419     }
 420     return false;
 421 }
 422
 423 /* Called with tlb_c.lock held */
 424 static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx,
 425                                               target_ulong page)
 426 {
 427     CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx];
 428     int k;
 429
 430     assert_cpu_is_self(env_cpu(env));
 431     for (k = 0; k < CPU_VTLB_SIZE; k++) {
 432         if (tlb_flush_entry_locked(&d->vtable[k], page)) {
 433             tlb_n_used_entries_dec(env, mmu_idx);
 434         }
 435     }
 436 }
 437
 438 static void tlb_flush_page_locked(CPUArchState *env, int midx,
 439                                   target_ulong page)
 440 {
 441     target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr;
 442     target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask;
 443
 444     /* Check if we need to flush due to large pages.  */
 445     if ((page & lp_mask) == lp_addr) {
 446         tlb_debug("forcing full flush midx %d ("
 447                   TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
 448                   midx, lp_addr, lp_mask);
 449         tlb_flush_one_mmuidx_locked(env, midx);
 450     } else {
 451         if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) {
 452             tlb_n_used_entries_dec(env, midx);
 453         }
 454         tlb_flush_vtlb_page_locked(env, midx, page);
 455     }
 456 }
 457
 458 /**
 459  * tlb_flush_page_by_mmuidx_async_0:
 460  * @cpu: cpu on which to flush
 461  * @addr: page of virtual address to flush
 462  * @idxmap: set of mmu_idx to flush
 463  *
 464  * Helper for tlb_flush_page_by_mmuidx and friends, flush one page
 465  * at @addr from the tlbs indicated by @idxmap from @cpu.
 466  */
 467 static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu,
 468                                              target_ulong addr,
 469                                              uint16_t idxmap)
 470 {
 471     CPUArchState *env = cpu->env_ptr;
 472     int mmu_idx;
 473
 474     assert_cpu_is_self(cpu);
 475
 476     tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%x\n", addr, idxmap);
 477
 478     qemu_spin_lock(&env_tlb(env)->c.lock);
 479     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
 480         if ((idxmap >> mmu_idx) & 1) {
 481             tlb_flush_page_locked(env, mmu_idx, addr);
 482         }
 483     }
 484     qemu_spin_unlock(&env_tlb(env)->c.lock);
 485
 486     tb_flush_jmp_cache(cpu, addr);
 487 }
 488
 489 /**
 490  * tlb_flush_page_by_mmuidx_async_1:
 491  * @cpu: cpu on which to flush
 492  * @data: encoded addr + idxmap
 493  *
 494  * Helper for tlb_flush_page_by_mmuidx and friends, called through
 495  * async_run_on_cpu.  The idxmap parameter is encoded in the page
 496  * offset of the target_ptr field.  This limits the set of mmu_idx
 497  * that can be passed via this method.
 498  */
 499 static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu,
 500                                              run_on_cpu_data data)
 501 {
 502     target_ulong addr_and_idxmap = (target_ulong) data.target_ptr;
 503     target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK;
 504     uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK;
 505
 506     tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
 507 }
 508
 509 typedef struct {
 510     target_ulong addr;
 511     uint16_t idxmap;
 512 } TLBFlushPageByMMUIdxData;
 513
 514 /**
 515  * tlb_flush_page_by_mmuidx_async_2:
 516  * @cpu: cpu on which to flush
 517  * @data: allocated addr + idxmap
 518  *
 519  * Helper for tlb_flush_page_by_mmuidx and friends, called through
 520  * async_run_on_cpu.  The addr+idxmap parameters are stored in a
 521  * TLBFlushPageByMMUIdxData structure that has been allocated
 522  * specifically for this helper.  Free the structure when done.
 523  */
 524 static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu,
 525                                              run_on_cpu_data data)
 526 {
 527     TLBFlushPageByMMUIdxData *d = data.host_ptr;
 528
 529     tlb_flush_page_by_mmuidx_async_0(cpu, d->addr, d->idxmap);
 530     g_free(d);
 531 }
 532
 533 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
 534 {
 535     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
 536
 537     /* This should already be page aligned */
 538     addr &= TARGET_PAGE_MASK;
 539
 540     if (qemu_cpu_is_self(cpu)) {
 541         tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
 542     } else if (idxmap < TARGET_PAGE_SIZE) {
 543         /*
 544          * Most targets have only a few mmu_idx.  In the case where
 545          * we can stuff idxmap into the low TARGET_PAGE_BITS, avoid
 546          * allocating memory for this operation.
 547          */
 548         async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_1,
 549                          RUN_ON_CPU_TARGET_PTR(addr | idxmap));
 550     } else {
 551         TLBFlushPageByMMUIdxData *d = g_new(TLBFlushPageByMMUIdxData, 1);
 552
 553         /* Otherwise allocate a structure, freed by the worker.  */
 554         d->addr = addr;
 555         d->idxmap = idxmap;
 556         async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_2,
 557                          RUN_ON_CPU_HOST_PTR(d));
 558     }
 559 }
 560
 561 void tlb_flush_page(CPUState *cpu, target_ulong addr)
 562 {
 563     tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS);
 564 }
 565
 566 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
 567                                        uint16_t idxmap)
 568 {
 569     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
 570
 571     /* This should already be page aligned */
 572     addr &= TARGET_PAGE_MASK;
 573
 574     /*
 575      * Allocate memory to hold addr+idxmap only when needed.
 576      * See tlb_flush_page_by_mmuidx for details.
 577      */
 578     if (idxmap < TARGET_PAGE_SIZE) {
 579         flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
 580                          RUN_ON_CPU_TARGET_PTR(addr | idxmap));
 581     } else {
 582         CPUState *dst_cpu;
 583
 584         /* Allocate a separate data block for each destination cpu.  */
 585         CPU_FOREACH(dst_cpu) {
 586             if (dst_cpu != src_cpu) {
 587                 TLBFlushPageByMMUIdxData *d
 588                     = g_new(TLBFlushPageByMMUIdxData, 1);
 589
 590                 d->addr = addr;
 591                 d->idxmap = idxmap;
 592                 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
 593                                  RUN_ON_CPU_HOST_PTR(d));
 594             }
 595         }
 596     }
 597
 598     tlb_flush_page_by_mmuidx_async_0(src_cpu, addr, idxmap);
 599 }
 600
 601 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
 602 {
 603     tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS);
 604 }
 605
 606 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
 607                                               target_ulong addr,
 608                                               uint16_t idxmap)
 609 {
 610     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
 611
 612     /* This should already be page aligned */
 613     addr &= TARGET_PAGE_MASK;
 614
 615     /*
 616      * Allocate memory to hold addr+idxmap only when needed.
 617      * See tlb_flush_page_by_mmuidx for details.
 618      */
 619     if (idxmap < TARGET_PAGE_SIZE) {
 620         flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
 621                          RUN_ON_CPU_TARGET_PTR(addr | idxmap));
 622         async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_1,
 623                               RUN_ON_CPU_TARGET_PTR(addr | idxmap));
 624     } else {
 625         CPUState *dst_cpu;
 626         TLBFlushPageByMMUIdxData *d;
 627
 628         /* Allocate a separate data block for each destination cpu.  */
 629         CPU_FOREACH(dst_cpu) {
 630             if (dst_cpu != src_cpu) {
 631                 d = g_new(TLBFlushPageByMMUIdxData, 1);
 632                 d->addr = addr;
 633                 d->idxmap = idxmap;
 634                 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
 635                                  RUN_ON_CPU_HOST_PTR(d));
 636             }
 637         }
 638
 639         d = g_new(TLBFlushPageByMMUIdxData, 1);
 640         d->addr = addr;
 641         d->idxmap = idxmap;
 642         async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_2,
 643                               RUN_ON_CPU_HOST_PTR(d));
 644     }
 645 }
 646
 647 void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr)
 648 {
 649     tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS);
 650 }
 651
 652 /* update the TLBs so that writes to code in the virtual page 'addr'
 653    can be detected */
 654 void tlb_protect_code(ram_addr_t ram_addr)
 655 {
 656     cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE,
 657                                              DIRTY_MEMORY_CODE);
 658 }
 659
 660 /* update the TLB so that writes in physical page 'phys_addr' are no longer
 661    tested for self modifying code */
 662 void tlb_unprotect_code(ram_addr_t ram_addr)
 663 {
 664     cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
 665 }
 666
 667
 668 /*
 669  * Dirty write flag handling
 670  *
 671  * When the TCG code writes to a location it looks up the address in
 672  * the TLB and uses that data to compute the final address. If any of
 673  * the lower bits of the address are set then the slow path is forced.
 674  * There are a number of reasons to do this but for normal RAM the
 675  * most usual is detecting writes to code regions which may invalidate
 676  * generated code.
 677  *
 678  * Other vCPUs might be reading their TLBs during guest execution, so we update
 679  * te->addr_write with atomic_set. We don't need to worry about this for
 680  * oversized guests as MTTCG is disabled for them.
 681  *
 682  * Called with tlb_c.lock held.
 683  */
 684 static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry,
 685                                          uintptr_t start, uintptr_t length)
 686 {
 687     uintptr_t addr = tlb_entry->addr_write;
 688
 689     if ((addr & (TLB_INVALID_MASK | TLB_MMIO |
 690                  TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) {
 691         addr &= TARGET_PAGE_MASK;
 692         addr += tlb_entry->addend;
 693         if ((addr - start) < length) {
 694 #if TCG_OVERSIZED_GUEST
 695             tlb_entry->addr_write |= TLB_NOTDIRTY;
 696 #else
 697             atomic_set(&tlb_entry->addr_write,
 698                        tlb_entry->addr_write | TLB_NOTDIRTY);
 699 #endif
 700         }
 701     }
 702 }
 703
 704 /*
 705  * Called with tlb_c.lock held.
 706  * Called only from the vCPU context, i.e. the TLB's owner thread.
 707  */
 708 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s)
 709 {
 710     *d = *s;
 711 }
 712
 713 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of
 714  * the target vCPU).
 715  * We must take tlb_c.lock to avoid racing with another vCPU update. The only
 716  * thing actually updated is the target TLB entry ->addr_write flags.
 717  */
 718 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
 719 {
 720     CPUArchState *env;
 721
 722     int mmu_idx;
 723
 724     env = cpu->env_ptr;
 725     qemu_spin_lock(&env_tlb(env)->c.lock);
 726     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
 727         unsigned int i;
 728         unsigned int n = tlb_n_entries(&env_tlb(env)->f[mmu_idx]);
 729
 730         for (i = 0; i < n; i++) {
 731             tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i],
 732                                          start1, length);
 733         }
 734
 735         for (i = 0; i < CPU_VTLB_SIZE; i++) {
 736             tlb_reset_dirty_range_locked(&env_tlb(env)->d[mmu_idx].vtable[i],
 737                                          start1, length);
 738         }
 739     }
 740     qemu_spin_unlock(&env_tlb(env)->c.lock);
 741 }
 742
 743 /* Called with tlb_c.lock held */
 744 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry,
 745                                          target_ulong vaddr)
 746 {
 747     if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) {
 748         tlb_entry->addr_write = vaddr;
 749     }
 750 }
 751
 752 /* update the TLB corresponding to virtual page vaddr
 753    so that it is no longer dirty */
 754 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
 755 {
 756     CPUArchState *env = cpu->env_ptr;
 757     int mmu_idx;
 758
 759     assert_cpu_is_self(cpu);
 760
 761     vaddr &= TARGET_PAGE_MASK;
 762     qemu_spin_lock(&env_tlb(env)->c.lock);
 763     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
 764         tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr);
 765     }
 766
 767     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
 768         int k;
 769         for (k = 0; k < CPU_VTLB_SIZE; k++) {
 770             tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], vaddr);
 771         }
 772     }
 773     qemu_spin_unlock(&env_tlb(env)->c.lock);
 774 }
 775
 776 /* Our TLB does not support large pages, so remember the area covered by
 777    large pages and trigger a full TLB flush if these are invalidated.  */
 778 static void tlb_add_large_page(CPUArchState *env, int mmu_idx,
 779                                target_ulong vaddr, target_ulong size)
 780 {
 781     target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr;
 782     target_ulong lp_mask = ~(size - 1);
 783
 784     if (lp_addr == (target_ulong)-1) {
 785         /* No previous large page.  */
 786         lp_addr = vaddr;
 787     } else {
 788         /* Extend the existing region to include the new page.
 789            This is a compromise between unnecessary flushes and
 790            the cost of maintaining a full variable size TLB.  */
 791         lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask;
 792         while (((lp_addr ^ vaddr) & lp_mask) != 0) {
 793             lp_mask <<= 1;
 794         }
 795     }
 796     env_tlb(env)->d[mmu_idx].large_page_addr = lp_addr & lp_mask;
 797     env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask;
 798 }
 799
 800 /* Add a new TLB entry. At most one entry for a given virtual address
 801  * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
 802  * supplied size is only used by tlb_flush_page.
 803  *
 804  * Called from TCG-generated code, which is under an RCU read-side
 805  * critical section.
 806  */
 807 void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
 808                              hwaddr paddr, MemTxAttrs attrs, int prot,
 809                              int mmu_idx, target_ulong size)
 810 {
 811     CPUArchState *env = cpu->env_ptr;
 812     CPUTLB *tlb = env_tlb(env);
 813     CPUTLBDesc *desc = &tlb->d[mmu_idx];
 814     MemoryRegionSection *section;
 815     unsigned int index;
 816     target_ulong address;
 817     target_ulong write_address;
 818     uintptr_t addend;
 819     CPUTLBEntry *te, tn;
 820     hwaddr iotlb, xlat, sz, paddr_page;
 821     target_ulong vaddr_page;
 822     int asidx = cpu_asidx_from_attrs(cpu, attrs);
 823     int wp_flags;
 824     bool is_ram, is_romd;
 825
 826     assert_cpu_is_self(cpu);
 827
 828     if (size <= TARGET_PAGE_SIZE) {
 829         sz = TARGET_PAGE_SIZE;
 830     } else {
 831         tlb_add_large_page(env, mmu_idx, vaddr, size);
 832         sz = size;
 833     }
 834     vaddr_page = vaddr & TARGET_PAGE_MASK;
 835     paddr_page = paddr & TARGET_PAGE_MASK;
 836
 837     section = address_space_translate_for_iotlb(cpu, asidx, paddr_page,
 838                                                 &xlat, &sz, attrs, &prot);
 839     assert(sz >= TARGET_PAGE_SIZE);
 840
 841     tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
 842               " prot=%x idx=%d\n",
 843               vaddr, paddr, prot, mmu_idx);
 844
 845     address = vaddr_page;
 846     if (size < TARGET_PAGE_SIZE) {
 847         /* Repeat the MMU check and TLB fill on every access.  */
 848         address |= TLB_INVALID_MASK;
 849     }
 850     if (attrs.byte_swap) {
 851         address |= TLB_BSWAP;
 852     }
 853
 854     is_ram = memory_region_is_ram(section->mr);
 855     is_romd = memory_region_is_romd(section->mr);
 856
 857     if (is_ram || is_romd) {
 858         /* RAM and ROMD both have associated host memory. */
 859         addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
 860     } else {
 861         /* I/O does not; force the host address to NULL. */
 862         addend = 0;
 863     }
 864
 865     write_address = address;
 866     if (is_ram) {
 867         iotlb = memory_region_get_ram_addr(section->mr) + xlat;
 868         /*
 869          * Computing is_clean is expensive; avoid all that unless
 870          * the page is actually writable.
 871          */
 872         if (prot & PAGE_WRITE) {
 873             if (section->readonly) {
 874                 write_address |= TLB_DISCARD_WRITE;
 875             } else if (cpu_physical_memory_is_clean(iotlb)) {
 876                 write_address |= TLB_NOTDIRTY;
 877             }
 878         }
 879     } else {
 880         /* I/O or ROMD */
 881         iotlb = memory_region_section_get_iotlb(cpu, section) + xlat;
 882         /*
 883          * Writes to romd devices must go through MMIO to enable write.
 884          * Reads to romd devices go through the ram_ptr found above,
 885          * but of course reads to I/O must go through MMIO.
 886          */
 887         write_address |= TLB_MMIO;
 888         if (!is_romd) {
 889             address = write_address;
 890         }
 891     }
 892
 893     wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page,
 894                                               TARGET_PAGE_SIZE);
 895
 896     index = tlb_index(env, mmu_idx, vaddr_page);
 897     te = tlb_entry(env, mmu_idx, vaddr_page);
 898
 899     /*
 900      * Hold the TLB lock for the rest of the function. We could acquire/release
 901      * the lock several times in the function, but it is faster to amortize the
 902      * acquisition cost by acquiring it just once. Note that this leads to
 903      * a longer critical section, but this is not a concern since the TLB lock
 904      * is unlikely to be contended.
 905      */
 906     qemu_spin_lock(&tlb->c.lock);
 907
 908     /* Note that the tlb is no longer clean.  */
 909     tlb->c.dirty |= 1 << mmu_idx;
 910
 911     /* Make sure there's no cached translation for the new page.  */
 912     tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page);
 913
 914     /*
 915      * Only evict the old entry to the victim tlb if it's for a
 916      * different page; otherwise just overwrite the stale data.
 917      */
 918     if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) {
 919         unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE;
 920         CPUTLBEntry *tv = &desc->vtable[vidx];
 921
 922         /* Evict the old entry into the victim tlb.  */
 923         copy_tlb_helper_locked(tv, te);
 924         desc->viotlb[vidx] = desc->iotlb[index];
 925         tlb_n_used_entries_dec(env, mmu_idx);
 926     }
 927
 928     /* refill the tlb */
 929     /*
 930      * At this point iotlb contains a physical section number in the lower
 931      * TARGET_PAGE_BITS, and either
 932      *  + the ram_addr_t of the page base of the target RAM (RAM)
 933      *  + the offset within section->mr of the page base (I/O, ROMD)
 934      * We subtract the vaddr_page (which is page aligned and thus won't
 935      * disturb the low bits) to give an offset which can be added to the
 936      * (non-page-aligned) vaddr of the eventual memory access to get
 937      * the MemoryRegion offset for the access. Note that the vaddr we
 938      * subtract here is that of the page base, and not the same as the
 939      * vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
 940      */
 941     desc->iotlb[index].addr = iotlb - vaddr_page;
 942     desc->iotlb[index].attrs = attrs;
 943
 944     /* Now calculate the new entry */
 945     tn.addend = addend - vaddr_page;
 946     if (prot & PAGE_READ) {
 947         tn.addr_read = address;
 948         if (wp_flags & BP_MEM_READ) {
 949             tn.addr_read |= TLB_WATCHPOINT;
 950         }
 951     } else {
 952         tn.addr_read = -1;
 953     }
 954
 955     if (prot & PAGE_EXEC) {
 956         tn.addr_code = address;
 957     } else {
 958         tn.addr_code = -1;
 959     }
 960
 961     tn.addr_write = -1;
 962     if (prot & PAGE_WRITE) {
 963         tn.addr_write = write_address;
 964         if (prot & PAGE_WRITE_INV) {
 965             tn.addr_write |= TLB_INVALID_MASK;
 966         }
 967         if (wp_flags & BP_MEM_WRITE) {
 968             tn.addr_write |= TLB_WATCHPOINT;
 969         }
 970     }
 971
 972     copy_tlb_helper_locked(te, &tn);
 973     tlb_n_used_entries_inc(env, mmu_idx);
 974     qemu_spin_unlock(&tlb->c.lock);
 975 }
 976
 977 /* Add a new TLB entry, but without specifying the memory
 978  * transaction attributes to be used.
 979  */
 980 void tlb_set_page(CPUState *cpu, target_ulong vaddr,
 981                   hwaddr paddr, int prot,
 982                   int mmu_idx, target_ulong size)
 983 {
 984     tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED,
 985                             prot, mmu_idx, size);
 986 }
 987
 988 static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
 989 {
 990     ram_addr_t ram_addr;
 991
 992     ram_addr = qemu_ram_addr_from_host(ptr);
 993     if (ram_addr == RAM_ADDR_INVALID) {
 994         error_report("Bad ram pointer %p", ptr);
 995         abort();
 996     }
 997     return ram_addr;
 998 }
 999
1000 /*
1001  * Note: tlb_fill() can trigger a resize of the TLB. This means that all of the
1002  * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must
1003  * be discarded and looked up again (e.g. via tlb_entry()).
1004  */
1005 static void tlb_fill(CPUState *cpu, target_ulong addr, int size,
1006                      MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
1007 {
1008     CPUClass *cc = CPU_GET_CLASS(cpu);
1009     bool ok;
1010
1011     /*
1012      * This is not a probe, so only valid return is success; failure
1013      * should result in exception + longjmp to the cpu loop.
1014      */
1015     ok = cc->tlb_fill(cpu, addr, size, access_type, mmu_idx, false, retaddr);
1016     assert(ok);
1017 }
1018
1019 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
1020                          int mmu_idx, target_ulong addr, uintptr_t retaddr,
1021                          MMUAccessType access_type, MemOp op)
1022 {
1023     CPUState *cpu = env_cpu(env);
1024     hwaddr mr_offset;
1025     MemoryRegionSection *section;
1026     MemoryRegion *mr;
1027     uint64_t val;
1028     bool locked = false;
1029     MemTxResult r;
1030
1031     section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
1032     mr = section->mr;
1033     mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
1034     cpu->mem_io_pc = retaddr;
1035     if (!cpu->can_do_io) {
1036         cpu_io_recompile(cpu, retaddr);
1037     }
1038
1039     if (mr->global_locking && !qemu_mutex_iothread_locked()) {
1040         qemu_mutex_lock_iothread();
1041         locked = true;
1042     }
1043     r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs);
1044     if (r != MEMTX_OK) {
1045         hwaddr physaddr = mr_offset +
1046             section->offset_within_address_space -
1047             section->offset_within_region;
1048
1049         cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type,
1050                                mmu_idx, iotlbentry->attrs, r, retaddr);
1051     }
1052     if (locked) {
1053         qemu_mutex_unlock_iothread();
1054     }
1055
1056     return val;
1057 }
1058
1059 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
1060                       int mmu_idx, uint64_t val, target_ulong addr,
1061                       uintptr_t retaddr, MemOp op)
1062 {
1063     CPUState *cpu = env_cpu(env);
1064     hwaddr mr_offset;
1065     MemoryRegionSection *section;
1066     MemoryRegion *mr;
1067     bool locked = false;
1068     MemTxResult r;
1069
1070     section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
1071     mr = section->mr;
1072     mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
1073     if (!cpu->can_do_io) {
1074         cpu_io_recompile(cpu, retaddr);
1075     }
1076     cpu->mem_io_pc = retaddr;
1077
1078     if (mr->global_locking && !qemu_mutex_iothread_locked()) {
1079         qemu_mutex_lock_iothread();
1080         locked = true;
1081     }
1082     r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs);
1083     if (r != MEMTX_OK) {
1084         hwaddr physaddr = mr_offset +
1085             section->offset_within_address_space -
1086             section->offset_within_region;
1087
1088         cpu_transaction_failed(cpu, physaddr, addr, memop_size(op),
1089                                MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r,
1090                                retaddr);
1091     }
1092     if (locked) {
1093         qemu_mutex_unlock_iothread();
1094     }
1095 }
1096
1097 static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs)
1098 {
1099 #if TCG_OVERSIZED_GUEST
1100     return *(target_ulong *)((uintptr_t)entry + ofs);
1101 #else
1102     /* ofs might correspond to .addr_write, so use atomic_read */
1103     return atomic_read((target_ulong *)((uintptr_t)entry + ofs));
1104 #endif
1105 }
1106
1107 /* Return true if ADDR is present in the victim tlb, and has been copied
1108    back to the main tlb.  */
1109 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
1110                            size_t elt_ofs, target_ulong page)
1111 {
1112     size_t vidx;
1113
1114     assert_cpu_is_self(env_cpu(env));
1115     for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
1116         CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx];
1117         target_ulong cmp;
1118
1119         /* elt_ofs might correspond to .addr_write, so use atomic_read */
1120 #if TCG_OVERSIZED_GUEST
1121         cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
1122 #else
1123         cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs));
1124 #endif
1125
1126         if (cmp == page) {
1127             /* Found entry in victim tlb, swap tlb and iotlb.  */
1128             CPUTLBEntry tmptlb, *tlb = &env_tlb(env)->f[mmu_idx].table[index];
1129
1130             qemu_spin_lock(&env_tlb(env)->c.lock);
1131             copy_tlb_helper_locked(&tmptlb, tlb);
1132             copy_tlb_helper_locked(tlb, vtlb);
1133             copy_tlb_helper_locked(vtlb, &tmptlb);
1134             qemu_spin_unlock(&env_tlb(env)->c.lock);
1135
1136             CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index];
1137             CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx];
1138             tmpio = *io; *io = *vio; *vio = tmpio;
1139             return true;
1140         }
1141     }
1142     return false;
1143 }
1144
1145 /* Macro to call the above, with local variables from the use context.  */
1146 #define VICTIM_TLB_HIT(TY, ADDR) \
1147   victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
1148                  (ADDR) & TARGET_PAGE_MASK)
1149
1150 /*
1151  * Return a ram_addr_t for the virtual address for execution.
1152  *
1153  * Return -1 if we can't translate and execute from an entire page
1154  * of RAM.  This will force us to execute by loading and translating
1155  * one insn at a time, without caching.
1156  *
1157  * NOTE: This function will trigger an exception if the page is
1158  * not executable.
1159  */
1160 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
1161                                         void **hostp)
1162 {
1163     uintptr_t mmu_idx = cpu_mmu_index(env, true);
1164     uintptr_t index = tlb_index(env, mmu_idx, addr);
1165     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1166     void *p;
1167
1168     if (unlikely(!tlb_hit(entry->addr_code, addr))) {
1169         if (!VICTIM_TLB_HIT(addr_code, addr)) {
1170             tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0);
1171             index = tlb_index(env, mmu_idx, addr);
1172             entry = tlb_entry(env, mmu_idx, addr);
1173
1174             if (unlikely(entry->addr_code & TLB_INVALID_MASK)) {
1175                 /*
1176                  * The MMU protection covers a smaller range than a target
1177                  * page, so we must redo the MMU check for every insn.
1178                  */
1179                 return -1;
1180             }
1181         }
1182         assert(tlb_hit(entry->addr_code, addr));
1183     }
1184
1185     if (unlikely(entry->addr_code & TLB_MMIO)) {
1186         /* The region is not backed by RAM.  */
1187         if (hostp) {
1188             *hostp = NULL;
1189         }
1190         return -1;
1191     }
1192
1193     p = (void *)((uintptr_t)addr + entry->addend);
1194     if (hostp) {
1195         *hostp = p;
1196     }
1197     return qemu_ram_addr_from_host_nofail(p);
1198 }
1199
1200 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
1201 {
1202     return get_page_addr_code_hostp(env, addr, NULL);
1203 }
1204
1205 static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
1206                            CPUIOTLBEntry *iotlbentry, uintptr_t retaddr)
1207 {
1208     ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr;
1209
1210     trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size);
1211
1212     if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1213         struct page_collection *pages
1214             = page_collection_lock(ram_addr, ram_addr + size);
1215         tb_invalidate_phys_page_fast(pages, ram_addr, size, retaddr);
1216         page_collection_unlock(pages);
1217     }
1218
1219     /*
1220      * Set both VGA and migration bits for simplicity and to remove
1221      * the notdirty callback faster.
1222      */
1223     cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE);
1224
1225     /* We remove the notdirty callback only if the code has been flushed. */
1226     if (!cpu_physical_memory_is_clean(ram_addr)) {
1227         trace_memory_notdirty_set_dirty(mem_vaddr);
1228         tlb_set_dirty(cpu, mem_vaddr);
1229     }
1230 }
1231
1232 /*
1233  * Probe for whether the specified guest access is permitted. If it is not
1234  * permitted then an exception will be taken in the same way as if this
1235  * were a real access (and we will not return).
1236  * If the size is 0 or the page requires I/O access, returns NULL; otherwise,
1237  * returns the address of the host page similar to tlb_vaddr_to_host().
1238  */
1239 void *probe_access(CPUArchState *env, target_ulong addr, int size,
1240                    MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
1241 {
1242     uintptr_t index = tlb_index(env, mmu_idx, addr);
1243     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1244     target_ulong tlb_addr;
1245     size_t elt_ofs;
1246     int wp_access;
1247
1248     g_assert(-(addr | TARGET_PAGE_MASK) >= size);
1249
1250     switch (access_type) {
1251     case MMU_DATA_LOAD:
1252         elt_ofs = offsetof(CPUTLBEntry, addr_read);
1253         wp_access = BP_MEM_READ;
1254         break;
1255     case MMU_DATA_STORE:
1256         elt_ofs = offsetof(CPUTLBEntry, addr_write);
1257         wp_access = BP_MEM_WRITE;
1258         break;
1259     case MMU_INST_FETCH:
1260         elt_ofs = offsetof(CPUTLBEntry, addr_code);
1261         wp_access = BP_MEM_READ;
1262         break;
1263     default:
1264         g_assert_not_reached();
1265     }
1266     tlb_addr = tlb_read_ofs(entry, elt_ofs);
1267
1268     if (unlikely(!tlb_hit(tlb_addr, addr))) {
1269         if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs,
1270                             addr & TARGET_PAGE_MASK)) {
1271             tlb_fill(env_cpu(env), addr, size, access_type, mmu_idx, retaddr);
1272             /* TLB resize via tlb_fill may have moved the entry. */
1273             index = tlb_index(env, mmu_idx, addr);
1274             entry = tlb_entry(env, mmu_idx, addr);
1275         }
1276         tlb_addr = tlb_read_ofs(entry, elt_ofs);
1277     }
1278
1279     if (!size) {
1280         return NULL;
1281     }
1282
1283     if (unlikely(tlb_addr & TLB_FLAGS_MASK)) {
1284         CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1285
1286         /* Reject I/O access, or other required slow-path.  */
1287         if (tlb_addr & (TLB_MMIO | TLB_BSWAP | TLB_DISCARD_WRITE)) {
1288             return NULL;
1289         }
1290
1291         /* Handle watchpoints.  */
1292         if (tlb_addr & TLB_WATCHPOINT) {
1293             cpu_check_watchpoint(env_cpu(env), addr, size,
1294                                  iotlbentry->attrs, wp_access, retaddr);
1295         }
1296
1297         /* Handle clean RAM pages.  */
1298         if (tlb_addr & TLB_NOTDIRTY) {
1299             notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
1300         }
1301     }
1302
1303     return (void *)((uintptr_t)addr + entry->addend);
1304 }
1305
1306 void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
1307                         MMUAccessType access_type, int mmu_idx)
1308 {
1309     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1310     target_ulong tlb_addr, page;
1311     size_t elt_ofs;
1312
1313     switch (access_type) {
1314     case MMU_DATA_LOAD:
1315         elt_ofs = offsetof(CPUTLBEntry, addr_read);
1316         break;
1317     case MMU_DATA_STORE:
1318         elt_ofs = offsetof(CPUTLBEntry, addr_write);
1319         break;
1320     case MMU_INST_FETCH:
1321         elt_ofs = offsetof(CPUTLBEntry, addr_code);
1322         break;
1323     default:
1324         g_assert_not_reached();
1325     }
1326
1327     page = addr & TARGET_PAGE_MASK;
1328     tlb_addr = tlb_read_ofs(entry, elt_ofs);
1329
1330     if (!tlb_hit_page(tlb_addr, page)) {
1331         uintptr_t index = tlb_index(env, mmu_idx, addr);
1332
1333         if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page)) {
1334             CPUState *cs = env_cpu(env);
1335             CPUClass *cc = CPU_GET_CLASS(cs);
1336
1337             if (!cc->tlb_fill(cs, addr, 0, access_type, mmu_idx, true, 0)) {
1338                 /* Non-faulting page table read failed.  */
1339                 return NULL;
1340             }
1341
1342             /* TLB resize via tlb_fill may have moved the entry.  */
1343             entry = tlb_entry(env, mmu_idx, addr);
1344         }
1345         tlb_addr = tlb_read_ofs(entry, elt_ofs);
1346     }
1347
1348     if (tlb_addr & ~TARGET_PAGE_MASK) {
1349         /* IO access */
1350         return NULL;
1351     }
1352
1353     return (void *)((uintptr_t)addr + entry->addend);
1354 }
1355
1356
1357 #ifdef CONFIG_PLUGIN
1358 /*
1359  * Perform a TLB lookup and populate the qemu_plugin_hwaddr structure.
1360  * This should be a hot path as we will have just looked this path up
1361  * in the softmmu lookup code (or helper). We don't handle re-fills or
1362  * checking the victim table. This is purely informational.
1363  *
1364  * This should never fail as the memory access being instrumented
1365  * should have just filled the TLB.
1366  */
1367
1368 bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
1369                        bool is_store, struct qemu_plugin_hwaddr *data)
1370 {
1371     CPUArchState *env = cpu->env_ptr;
1372     CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
1373     uintptr_t index = tlb_index(env, mmu_idx, addr);
1374     target_ulong tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read;
1375
1376     if (likely(tlb_hit(tlb_addr, addr))) {
1377         /* We must have an iotlb entry for MMIO */
1378         if (tlb_addr & TLB_MMIO) {
1379             CPUIOTLBEntry *iotlbentry;
1380             iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1381             data->is_io = true;
1382             data->v.io.section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
1383             data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
1384         } else {
1385             data->is_io = false;
1386             data->v.ram.hostaddr = addr + tlbe->addend;
1387         }
1388         return true;
1389     }
1390     return false;
1391 }
1392
1393 #endif
1394
1395 /* Probe for a read-modify-write atomic operation.  Do not allow unaligned
1396  * operations, or io operations to proceed.  Return the host address.  */
1397 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
1398                                TCGMemOpIdx oi, uintptr_t retaddr)
1399 {
1400     size_t mmu_idx = get_mmuidx(oi);
1401     uintptr_t index = tlb_index(env, mmu_idx, addr);
1402     CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
1403     target_ulong tlb_addr = tlb_addr_write(tlbe);
1404     MemOp mop = get_memop(oi);
1405     int a_bits = get_alignment_bits(mop);
1406     int s_bits = mop & MO_SIZE;
1407     void *hostaddr;
1408
1409     /* Adjust the given return address.  */
1410     retaddr -= GETPC_ADJ;
1411
1412     /* Enforce guest required alignment.  */
1413     if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) {
1414         /* ??? Maybe indicate atomic op to cpu_unaligned_access */
1415         cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
1416                              mmu_idx, retaddr);
1417     }
1418
1419     /* Enforce qemu required alignment.  */
1420     if (unlikely(addr & ((1 << s_bits) - 1))) {
1421         /* We get here if guest alignment was not requested,
1422            or was not enforced by cpu_unaligned_access above.
1423            We might widen the access and emulate, but for now
1424            mark an exception and exit the cpu loop.  */
1425         goto stop_the_world;
1426     }
1427
1428     /* Check TLB entry and enforce page permissions.  */
1429     if (!tlb_hit(tlb_addr, addr)) {
1430         if (!VICTIM_TLB_HIT(addr_write, addr)) {
1431             tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_STORE,
1432                      mmu_idx, retaddr);
1433             index = tlb_index(env, mmu_idx, addr);
1434             tlbe = tlb_entry(env, mmu_idx, addr);
1435         }
1436         tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
1437     }
1438
1439     /* Notice an IO access or a needs-MMU-lookup access */
1440     if (unlikely(tlb_addr & TLB_MMIO)) {
1441         /* There's really nothing that can be done to
1442            support this apart from stop-the-world.  */
1443         goto stop_the_world;
1444     }
1445
1446     /* Let the guest notice RMW on a write-only page.  */
1447     if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) {
1448         tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_LOAD,
1449                  mmu_idx, retaddr);
1450         /* Since we don't support reads and writes to different addresses,
1451            and we do have the proper page loaded for write, this shouldn't
1452            ever return.  But just in case, handle via stop-the-world.  */
1453         goto stop_the_world;
1454     }
1455
1456     hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
1457
1458     if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
1459         notdirty_write(env_cpu(env), addr, 1 << s_bits,
1460                        &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr);
1461     }
1462
1463     return hostaddr;
1464
1465  stop_the_world:
1466     cpu_loop_exit_atomic(env_cpu(env), retaddr);
1467 }
1468
1469 /*
1470  * Load Helpers
1471  *
1472  * We support two different access types. SOFTMMU_CODE_ACCESS is
1473  * specifically for reading instructions from system memory. It is
1474  * called by the translation loop and in some helpers where the code
1475  * is disassembled. It shouldn't be called directly by guest code.
1476  */
1477
1478 typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr,
1479                                 TCGMemOpIdx oi, uintptr_t retaddr);
1480
1481 static inline uint64_t QEMU_ALWAYS_INLINE
1482 load_memop(const void *haddr, MemOp op)
1483 {
1484     switch (op) {
1485     case MO_UB:
1486         return ldub_p(haddr);
1487     case MO_BEUW:
1488         return lduw_be_p(haddr);
1489     case MO_LEUW:
1490         return lduw_le_p(haddr);
1491     case MO_BEUL:
1492         return (uint32_t)ldl_be_p(haddr);
1493     case MO_LEUL:
1494         return (uint32_t)ldl_le_p(haddr);
1495     case MO_BEQ:
1496         return ldq_be_p(haddr);
1497     case MO_LEQ:
1498         return ldq_le_p(haddr);
1499     default:
1500         qemu_build_not_reached();
1501     }
1502 }
1503
1504 static inline uint64_t QEMU_ALWAYS_INLINE
1505 load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
1506             uintptr_t retaddr, MemOp op, bool code_read,
1507             FullLoadHelper *full_load)
1508 {
1509     uintptr_t mmu_idx = get_mmuidx(oi);
1510     uintptr_t index = tlb_index(env, mmu_idx, addr);
1511     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1512     target_ulong tlb_addr = code_read ? entry->addr_code : entry->addr_read;
1513     const size_t tlb_off = code_read ?
1514         offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read);
1515     const MMUAccessType access_type =
1516         code_read ? MMU_INST_FETCH : MMU_DATA_LOAD;
1517     unsigned a_bits = get_alignment_bits(get_memop(oi));
1518     void *haddr;
1519     uint64_t res;
1520     size_t size = memop_size(op);
1521
1522     /* Handle CPU specific unaligned behaviour */
1523     if (addr & ((1 << a_bits) - 1)) {
1524         cpu_unaligned_access(env_cpu(env), addr, access_type,
1525                              mmu_idx, retaddr);
1526     }
1527
1528     /* If the TLB entry is for a different page, reload and try again.  */
1529     if (!tlb_hit(tlb_addr, addr)) {
1530         if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
1531                             addr & TARGET_PAGE_MASK)) {
1532             tlb_fill(env_cpu(env), addr, size,
1533                      access_type, mmu_idx, retaddr);
1534             index = tlb_index(env, mmu_idx, addr);
1535             entry = tlb_entry(env, mmu_idx, addr);
1536         }
1537         tlb_addr = code_read ? entry->addr_code : entry->addr_read;
1538         tlb_addr &= ~TLB_INVALID_MASK;
1539     }
1540
1541     /* Handle anything that isn't just a straight memory access.  */
1542     if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
1543         CPUIOTLBEntry *iotlbentry;
1544         bool need_swap;
1545
1546         /* For anything that is unaligned, recurse through full_load.  */
1547         if ((addr & (size - 1)) != 0) {
1548             goto do_unaligned_access;
1549         }
1550
1551         iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1552
1553         /* Handle watchpoints.  */
1554         if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
1555             /* On watchpoint hit, this will longjmp out.  */
1556             cpu_check_watchpoint(env_cpu(env), addr, size,
1557                                  iotlbentry->attrs, BP_MEM_READ, retaddr);
1558         }
1559
1560         need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
1561
1562         /* Handle I/O access.  */
1563         if (likely(tlb_addr & TLB_MMIO)) {
1564             return io_readx(env, iotlbentry, mmu_idx, addr, retaddr,
1565                             access_type, op ^ (need_swap * MO_BSWAP));
1566         }
1567
1568         haddr = (void *)((uintptr_t)addr + entry->addend);
1569
1570         /*
1571          * Keep these two load_memop separate to ensure that the compiler
1572          * is able to fold the entire function to a single instruction.
1573          * There is a build-time assert inside to remind you of this.  ;-)
1574          */
1575         if (unlikely(need_swap)) {
1576             return load_memop(haddr, op ^ MO_BSWAP);
1577         }
1578         return load_memop(haddr, op);
1579     }
1580
1581     /* Handle slow unaligned access (it spans two pages or IO).  */
1582     if (size > 1
1583         && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
1584                     >= TARGET_PAGE_SIZE)) {
1585         target_ulong addr1, addr2;
1586         uint64_t r1, r2;
1587         unsigned shift;
1588     do_unaligned_access:
1589         addr1 = addr & ~((target_ulong)size - 1);
1590         addr2 = addr1 + size;
1591         r1 = full_load(env, addr1, oi, retaddr);
1592         r2 = full_load(env, addr2, oi, retaddr);
1593         shift = (addr & (size - 1)) * 8;
1594
1595         if (memop_big_endian(op)) {
1596             /* Big-endian combine.  */
1597             res = (r1 << shift) | (r2 >> ((size * 8) - shift));
1598         } else {
1599             /* Little-endian combine.  */
1600             res = (r1 >> shift) | (r2 << ((size * 8) - shift));
1601         }
1602         return res & MAKE_64BIT_MASK(0, size * 8);
1603     }
1604
1605     haddr = (void *)((uintptr_t)addr + entry->addend);
1606     return load_memop(haddr, op);
1607 }
1608
1609 /*
1610  * For the benefit of TCG generated code, we want to avoid the
1611  * complication of ABI-specific return type promotion and always
1612  * return a value extended to the register size of the host. This is
1613  * tcg_target_long, except in the case of a 32-bit host and 64-bit
1614  * data, and for that we always have uint64_t.
1615  *
1616  * We don't bother with this widened value for SOFTMMU_CODE_ACCESS.
1617  */
1618
1619 static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr,
1620                               TCGMemOpIdx oi, uintptr_t retaddr)
1621 {
1622     return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu);
1623 }
1624
1625 tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
1626                                      TCGMemOpIdx oi, uintptr_t retaddr)
1627 {
1628     return full_ldub_mmu(env, addr, oi, retaddr);
1629 }
1630
1631 static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr,
1632                                  TCGMemOpIdx oi, uintptr_t retaddr)
1633 {
1634     return load_helper(env, addr, oi, retaddr, MO_LEUW, false,
1635                        full_le_lduw_mmu);
1636 }
1637
1638 tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
1639                                     TCGMemOpIdx oi, uintptr_t retaddr)
1640 {
1641     return full_le_lduw_mmu(env, addr, oi, retaddr);
1642 }
1643
1644 static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr,
1645                                  TCGMemOpIdx oi, uintptr_t retaddr)
1646 {
1647     return load_helper(env, addr, oi, retaddr, MO_BEUW, false,
1648                        full_be_lduw_mmu);
1649 }
1650
1651 tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
1652                                     TCGMemOpIdx oi, uintptr_t retaddr)
1653 {
1654     return full_be_lduw_mmu(env, addr, oi, retaddr);
1655 }
1656
1657 static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr,
1658                                  TCGMemOpIdx oi, uintptr_t retaddr)
1659 {
1660     return load_helper(env, addr, oi, retaddr, MO_LEUL, false,
1661                        full_le_ldul_mmu);
1662 }
1663
1664 tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
1665                                     TCGMemOpIdx oi, uintptr_t retaddr)
1666 {
1667     return full_le_ldul_mmu(env, addr, oi, retaddr);
1668 }
1669
1670 static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr,
1671                                  TCGMemOpIdx oi, uintptr_t retaddr)
1672 {
1673     return load_helper(env, addr, oi, retaddr, MO_BEUL, false,
1674                        full_be_ldul_mmu);
1675 }
1676
1677 tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
1678                                     TCGMemOpIdx oi, uintptr_t retaddr)
1679 {
1680     return full_be_ldul_mmu(env, addr, oi, retaddr);
1681 }
1682
1683 uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
1684                            TCGMemOpIdx oi, uintptr_t retaddr)
1685 {
1686     return load_helper(env, addr, oi, retaddr, MO_LEQ, false,
1687                        helper_le_ldq_mmu);
1688 }
1689
1690 uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
1691                            TCGMemOpIdx oi, uintptr_t retaddr)
1692 {
1693     return load_helper(env, addr, oi, retaddr, MO_BEQ, false,
1694                        helper_be_ldq_mmu);
1695 }
1696
1697 /*
1698  * Provide signed versions of the load routines as well.  We can of course
1699  * avoid this for 64-bit data, or for 32-bit data on 32-bit host.
1700  */
1701
1702
1703 tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
1704                                      TCGMemOpIdx oi, uintptr_t retaddr)
1705 {
1706     return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr);
1707 }
1708
1709 tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr,
1710                                     TCGMemOpIdx oi, uintptr_t retaddr)
1711 {
1712     return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr);
1713 }
1714
1715 tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr,
1716                                     TCGMemOpIdx oi, uintptr_t retaddr)
1717 {
1718     return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr);
1719 }
1720
1721 tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr,
1722                                     TCGMemOpIdx oi, uintptr_t retaddr)
1723 {
1724     return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr);
1725 }
1726
1727 tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
1728                                     TCGMemOpIdx oi, uintptr_t retaddr)
1729 {
1730     return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr);
1731 }
1732
1733 /*
1734  * Load helpers for cpu_ldst.h.
1735  */
1736
1737 static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr,
1738                                        int mmu_idx, uintptr_t retaddr,
1739                                        MemOp op, FullLoadHelper *full_load)
1740 {
1741     uint16_t meminfo;
1742     TCGMemOpIdx oi;
1743     uint64_t ret;
1744
1745     meminfo = trace_mem_get_info(op, mmu_idx, false);
1746     trace_guest_mem_before_exec(env_cpu(env), addr, meminfo);
1747
1748     op &= ~MO_SIGN;
1749     oi = make_memop_idx(op, mmu_idx);
1750     ret = full_load(env, addr, oi, retaddr);
1751
1752     qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo);
1753
1754     return ret;
1755 }
1756
1757 uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1758                             int mmu_idx, uintptr_t ra)
1759 {
1760     return cpu_load_helper(env, addr, mmu_idx, ra, MO_UB, full_ldub_mmu);
1761 }
1762
1763 int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1764                        int mmu_idx, uintptr_t ra)
1765 {
1766     return (int8_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_SB,
1767                                    full_ldub_mmu);
1768 }
1769
1770 uint32_t cpu_lduw_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1771                             int mmu_idx, uintptr_t ra)
1772 {
1773     return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEUW,
1774                            MO_TE == MO_LE
1775                            ? full_le_lduw_mmu : full_be_lduw_mmu);
1776 }
1777
1778 int cpu_ldsw_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1779                        int mmu_idx, uintptr_t ra)
1780 {
1781     return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_TESW,
1782                                     MO_TE == MO_LE
1783                                     ? full_le_lduw_mmu : full_be_lduw_mmu);
1784 }
1785
1786 uint32_t cpu_ldl_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1787                            int mmu_idx, uintptr_t ra)
1788 {
1789     return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEUL,
1790                            MO_TE == MO_LE
1791                            ? full_le_ldul_mmu : full_be_ldul_mmu);
1792 }
1793
1794 uint64_t cpu_ldq_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1795                            int mmu_idx, uintptr_t ra)
1796 {
1797     return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEQ,
1798                            MO_TE == MO_LE
1799                            ? helper_le_ldq_mmu : helper_be_ldq_mmu);
1800 }
1801
1802 uint32_t cpu_ldub_data_ra(CPUArchState *env, target_ulong ptr,
1803                           uintptr_t retaddr)
1804 {
1805     return cpu_ldub_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1806 }
1807
1808 int cpu_ldsb_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
1809 {
1810     return cpu_ldsb_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1811 }
1812
1813 uint32_t cpu_lduw_data_ra(CPUArchState *env, target_ulong ptr,
1814                           uintptr_t retaddr)
1815 {
1816     return cpu_lduw_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1817 }
1818
1819 int cpu_ldsw_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
1820 {
1821     return cpu_ldsw_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1822 }
1823
1824 uint32_t cpu_ldl_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
1825 {
1826     return cpu_ldl_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1827 }
1828
1829 uint64_t cpu_ldq_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
1830 {
1831     return cpu_ldq_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1832 }
1833
1834 uint32_t cpu_ldub_data(CPUArchState *env, target_ulong ptr)
1835 {
1836     return cpu_ldub_data_ra(env, ptr, 0);
1837 }
1838
1839 int cpu_ldsb_data(CPUArchState *env, target_ulong ptr)
1840 {
1841     return cpu_ldsb_data_ra(env, ptr, 0);
1842 }
1843
1844 uint32_t cpu_lduw_data(CPUArchState *env, target_ulong ptr)
1845 {
1846     return cpu_lduw_data_ra(env, ptr, 0);
1847 }
1848
1849 int cpu_ldsw_data(CPUArchState *env, target_ulong ptr)
1850 {
1851     return cpu_ldsw_data_ra(env, ptr, 0);
1852 }
1853
1854 uint32_t cpu_ldl_data(CPUArchState *env, target_ulong ptr)
1855 {
1856     return cpu_ldl_data_ra(env, ptr, 0);
1857 }
1858
1859 uint64_t cpu_ldq_data(CPUArchState *env, target_ulong ptr)
1860 {
1861     return cpu_ldq_data_ra(env, ptr, 0);
1862 }
1863
1864 /*
1865  * Store Helpers
1866  */
1867
1868 static inline void QEMU_ALWAYS_INLINE
1869 store_memop(void *haddr, uint64_t val, MemOp op)
1870 {
1871     switch (op) {
1872     case MO_UB:
1873         stb_p(haddr, val);
1874         break;
1875     case MO_BEUW:
1876         stw_be_p(haddr, val);
1877         break;
1878     case MO_LEUW:
1879         stw_le_p(haddr, val);
1880         break;
1881     case MO_BEUL:
1882         stl_be_p(haddr, val);
1883         break;
1884     case MO_LEUL:
1885         stl_le_p(haddr, val);
1886         break;
1887     case MO_BEQ:
1888         stq_be_p(haddr, val);
1889         break;
1890     case MO_LEQ:
1891         stq_le_p(haddr, val);
1892         break;
1893     default:
1894         qemu_build_not_reached();
1895     }
1896 }
1897
1898 static inline void QEMU_ALWAYS_INLINE
1899 store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
1900              TCGMemOpIdx oi, uintptr_t retaddr, MemOp op)
1901 {
1902     uintptr_t mmu_idx = get_mmuidx(oi);
1903     uintptr_t index = tlb_index(env, mmu_idx, addr);
1904     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1905     target_ulong tlb_addr = tlb_addr_write(entry);
1906     const size_t tlb_off = offsetof(CPUTLBEntry, addr_write);
1907     unsigned a_bits = get_alignment_bits(get_memop(oi));
1908     void *haddr;
1909     size_t size = memop_size(op);
1910
1911     /* Handle CPU specific unaligned behaviour */
1912     if (addr & ((1 << a_bits) - 1)) {
1913         cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
1914                              mmu_idx, retaddr);
1915     }
1916
1917     /* If the TLB entry is for a different page, reload and try again.  */
1918     if (!tlb_hit(tlb_addr, addr)) {
1919         if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
1920             addr & TARGET_PAGE_MASK)) {
1921             tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE,
1922                      mmu_idx, retaddr);
1923             index = tlb_index(env, mmu_idx, addr);
1924             entry = tlb_entry(env, mmu_idx, addr);
1925         }
1926         tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK;
1927     }
1928
1929     /* Handle anything that isn't just a straight memory access.  */
1930     if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
1931         CPUIOTLBEntry *iotlbentry;
1932         bool need_swap;
1933
1934         /* For anything that is unaligned, recurse through byte stores.  */
1935         if ((addr & (size - 1)) != 0) {
1936             goto do_unaligned_access;
1937         }
1938
1939         iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1940
1941         /* Handle watchpoints.  */
1942         if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
1943             /* On watchpoint hit, this will longjmp out.  */
1944             cpu_check_watchpoint(env_cpu(env), addr, size,
1945                                  iotlbentry->attrs, BP_MEM_WRITE, retaddr);
1946         }
1947
1948         need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
1949
1950         /* Handle I/O access.  */
1951         if (tlb_addr & TLB_MMIO) {
1952             io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr,
1953                       op ^ (need_swap * MO_BSWAP));
1954             return;
1955         }
1956
1957         /* Ignore writes to ROM.  */
1958         if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) {
1959             return;
1960         }
1961
1962         /* Handle clean RAM pages.  */
1963         if (tlb_addr & TLB_NOTDIRTY) {
1964             notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
1965         }
1966
1967         haddr = (void *)((uintptr_t)addr + entry->addend);
1968
1969         /*
1970          * Keep these two store_memop separate to ensure that the compiler
1971          * is able to fold the entire function to a single instruction.
1972          * There is a build-time assert inside to remind you of this.  ;-)
1973          */
1974         if (unlikely(need_swap)) {
1975             store_memop(haddr, val, op ^ MO_BSWAP);
1976         } else {
1977             store_memop(haddr, val, op);
1978         }
1979         return;
1980     }
1981
1982     /* Handle slow unaligned access (it spans two pages or IO).  */
1983     if (size > 1
1984         && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
1985                      >= TARGET_PAGE_SIZE)) {
1986         int i;
1987         uintptr_t index2;
1988         CPUTLBEntry *entry2;
1989         target_ulong page2, tlb_addr2;
1990         size_t size2;
1991
1992     do_unaligned_access:
1993         /*
1994          * Ensure the second page is in the TLB.  Note that the first page
1995          * is already guaranteed to be filled, and that the second page
1996          * cannot evict the first.
1997          */
1998         page2 = (addr + size) & TARGET_PAGE_MASK;
1999         size2 = (addr + size) & ~TARGET_PAGE_MASK;
2000         index2 = tlb_index(env, mmu_idx, page2);
2001         entry2 = tlb_entry(env, mmu_idx, page2);
2002         tlb_addr2 = tlb_addr_write(entry2);
2003         if (!tlb_hit_page(tlb_addr2, page2)) {
2004             if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) {
2005                 tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE,
2006                          mmu_idx, retaddr);
2007                 index2 = tlb_index(env, mmu_idx, page2);
2008                 entry2 = tlb_entry(env, mmu_idx, page2);
2009             }
2010             tlb_addr2 = tlb_addr_write(entry2);
2011         }
2012
2013         /*
2014          * Handle watchpoints.  Since this may trap, all checks
2015          * must happen before any store.
2016          */
2017         if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
2018             cpu_check_watchpoint(env_cpu(env), addr, size - size2,
2019                                  env_tlb(env)->d[mmu_idx].iotlb[index].attrs,
2020                                  BP_MEM_WRITE, retaddr);
2021         }
2022         if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) {
2023             cpu_check_watchpoint(env_cpu(env), page2, size2,
2024                                  env_tlb(env)->d[mmu_idx].iotlb[index2].attrs,
2025                                  BP_MEM_WRITE, retaddr);
2026         }
2027
2028         /*
2029          * XXX: not efficient, but simple.
2030          * This loop must go in the forward direction to avoid issues
2031          * with self-modifying code in Windows 64-bit.
2032          */
2033         for (i = 0; i < size; ++i) {
2034             uint8_t val8;
2035             if (memop_big_endian(op)) {
2036                 /* Big-endian extract.  */
2037                 val8 = val >> (((size - 1) * 8) - (i * 8));
2038             } else {
2039                 /* Little-endian extract.  */
2040                 val8 = val >> (i * 8);
2041             }
2042             helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr);
2043         }
2044         return;
2045     }
2046
2047     haddr = (void *)((uintptr_t)addr + entry->addend);
2048     store_memop(haddr, val, op);
2049 }
2050
2051 void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
2052                         TCGMemOpIdx oi, uintptr_t retaddr)
2053 {
2054     store_helper(env, addr, val, oi, retaddr, MO_UB);
2055 }
2056
2057 void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
2058                        TCGMemOpIdx oi, uintptr_t retaddr)
2059 {
2060     store_helper(env, addr, val, oi, retaddr, MO_LEUW);
2061 }
2062
2063 void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
2064                        TCGMemOpIdx oi, uintptr_t retaddr)
2065 {
2066     store_helper(env, addr, val, oi, retaddr, MO_BEUW);
2067 }
2068
2069 void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
2070                        TCGMemOpIdx oi, uintptr_t retaddr)
2071 {
2072     store_helper(env, addr, val, oi, retaddr, MO_LEUL);
2073 }
2074
2075 void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
2076                        TCGMemOpIdx oi, uintptr_t retaddr)
2077 {
2078     store_helper(env, addr, val, oi, retaddr, MO_BEUL);
2079 }
2080
2081 void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
2082                        TCGMemOpIdx oi, uintptr_t retaddr)
2083 {
2084     store_helper(env, addr, val, oi, retaddr, MO_LEQ);
2085 }
2086
2087 void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
2088                        TCGMemOpIdx oi, uintptr_t retaddr)
2089 {
2090     store_helper(env, addr, val, oi, retaddr, MO_BEQ);
2091 }
2092
2093 /*
2094  * Store Helpers for cpu_ldst.h
2095  */
2096
2097 static inline void QEMU_ALWAYS_INLINE
2098 cpu_store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
2099                  int mmu_idx, uintptr_t retaddr, MemOp op)
2100 {
2101     TCGMemOpIdx oi;
2102     uint16_t meminfo;
2103
2104     meminfo = trace_mem_get_info(op, mmu_idx, true);
2105     trace_guest_mem_before_exec(env_cpu(env), addr, meminfo);
2106
2107     oi = make_memop_idx(op, mmu_idx);
2108     store_helper(env, addr, val, oi, retaddr, op);
2109
2110     qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo);
2111 }
2112
2113 void cpu_stb_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2114                        int mmu_idx, uintptr_t retaddr)
2115 {
2116     cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_UB);
2117 }
2118
2119 void cpu_stw_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2120                        int mmu_idx, uintptr_t retaddr)
2121 {
2122     cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEUW);
2123 }
2124
2125 void cpu_stl_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2126                        int mmu_idx, uintptr_t retaddr)
2127 {
2128     cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEUL);
2129 }
2130
2131 void cpu_stq_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val,
2132                        int mmu_idx, uintptr_t retaddr)
2133 {
2134     cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEQ);
2135 }
2136
2137 void cpu_stb_data_ra(CPUArchState *env, target_ulong ptr,
2138                      uint32_t val, uintptr_t retaddr)
2139 {
2140     cpu_stb_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2141 }
2142
2143 void cpu_stw_data_ra(CPUArchState *env, target_ulong ptr,
2144                      uint32_t val, uintptr_t retaddr)
2145 {
2146     cpu_stw_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2147 }
2148
2149 void cpu_stl_data_ra(CPUArchState *env, target_ulong ptr,
2150                      uint32_t val, uintptr_t retaddr)
2151 {
2152     cpu_stl_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2153 }
2154
2155 void cpu_stq_data_ra(CPUArchState *env, target_ulong ptr,
2156                      uint64_t val, uintptr_t retaddr)
2157 {
2158     cpu_stq_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2159 }
2160
2161 void cpu_stb_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2162 {
2163     cpu_stb_data_ra(env, ptr, val, 0);
2164 }
2165
2166 void cpu_stw_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2167 {
2168     cpu_stw_data_ra(env, ptr, val, 0);
2169 }
2170
2171 void cpu_stl_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2172 {
2173     cpu_stl_data_ra(env, ptr, val, 0);
2174 }
2175
2176 void cpu_stq_data(CPUArchState *env, target_ulong ptr, uint64_t val)
2177 {
2178     cpu_stq_data_ra(env, ptr, val, 0);
2179 }
2180
2181 /* First set of helpers allows passing in of OI and RETADDR.  This makes
2182    them callable from other helpers.  */
2183
2184 #define EXTRA_ARGS     , TCGMemOpIdx oi, uintptr_t retaddr
2185 #define ATOMIC_NAME(X) \
2186     HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu))
2187 #define ATOMIC_MMU_DECLS
2188 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr)
2189 #define ATOMIC_MMU_CLEANUP
2190 #define ATOMIC_MMU_IDX   get_mmuidx(oi)
2191
2192 #include "atomic_common.inc.c"
2193
2194 #define DATA_SIZE 1
2195 #include "atomic_template.h"
2196
2197 #define DATA_SIZE 2
2198 #include "atomic_template.h"
2199
2200 #define DATA_SIZE 4
2201 #include "atomic_template.h"
2202
2203 #ifdef CONFIG_ATOMIC64
2204 #define DATA_SIZE 8
2205 #include "atomic_template.h"
2206 #endif
2207
2208 #if HAVE_CMPXCHG128 || HAVE_ATOMIC128
2209 #define DATA_SIZE 16
2210 #include "atomic_template.h"
2211 #endif
2212
2213 /* Second set of helpers are directly callable from TCG as helpers.  */
2214
2215 #undef EXTRA_ARGS
2216 #undef ATOMIC_NAME
2217 #undef ATOMIC_MMU_LOOKUP
2218 #define EXTRA_ARGS         , TCGMemOpIdx oi
2219 #define ATOMIC_NAME(X)     HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
2220 #define ATOMIC_MMU_LOOKUP  atomic_mmu_lookup(env, addr, oi, GETPC())
2221
2222 #define DATA_SIZE 1
2223 #include "atomic_template.h"
2224
2225 #define DATA_SIZE 2
2226 #include "atomic_template.h"
2227
2228 #define DATA_SIZE 4
2229 #include "atomic_template.h"
2230
2231 #ifdef CONFIG_ATOMIC64
2232 #define DATA_SIZE 8
2233 #include "atomic_template.h"
2234 #endif
2235 #undef ATOMIC_MMU_IDX
2236
2237 /* Code access functions.  */
2238
2239 static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr,
2240                                TCGMemOpIdx oi, uintptr_t retaddr)
2241 {
2242     return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_code);
2243 }
2244
2245 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr)
2246 {
2247     TCGMemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true));
2248     return full_ldub_code(env, addr, oi, 0);
2249 }
2250
2251 static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr,
2252                                TCGMemOpIdx oi, uintptr_t retaddr)
2253 {
2254     return load_helper(env, addr, oi, retaddr, MO_TEUW, true, full_lduw_code);
2255 }
2256
2257 uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr)
2258 {
2259     TCGMemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true));
2260     return full_lduw_code(env, addr, oi, 0);
2261 }
2262
2263 static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr,
2264                               TCGMemOpIdx oi, uintptr_t retaddr)
2265 {
2266     return load_helper(env, addr, oi, retaddr, MO_TEUL, true, full_ldl_code);
2267 }
2268
2269 uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr)
2270 {
2271     TCGMemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true));
2272     return full_ldl_code(env, addr, oi, 0);
2273 }
2274
2275 static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr,
2276                               TCGMemOpIdx oi, uintptr_t retaddr)
2277 {
2278     return load_helper(env, addr, oi, retaddr, MO_TEQ, true, full_ldq_code);
2279 }
2280
2281 uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr)
2282 {
2283     TCGMemOpIdx oi = make_memop_idx(MO_TEQ, cpu_mmu_index(env, true));
2284     return full_ldq_code(env, addr, oi, 0);
2285 }