exec.c

   1 /*
   2  *  Virtual page mapping
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include "qemu/osdep.h"
  20 #include "qapi/error.h"
  21 #ifndef _WIN32
  22 #endif
  23
  24 #include "qemu/cutils.h"
  25 #include "cpu.h"
  26 #include "exec/exec-all.h"
  27 #include "tcg.h"
  28 #include "hw/qdev-core.h"
  29 #if !defined(CONFIG_USER_ONLY)
  30 #include "hw/boards.h"
  31 #include "hw/xen/xen.h"
  32 #endif
  33 #include "sysemu/kvm.h"
  34 #include "sysemu/sysemu.h"
  35 #include "qemu/timer.h"
  36 #include "qemu/config-file.h"
  37 #include "qemu/error-report.h"
  38 #if defined(CONFIG_USER_ONLY)
  39 #include "qemu.h"
  40 #else /* !CONFIG_USER_ONLY */
  41 #include "hw/hw.h"
  42 #include "exec/memory.h"
  43 #include "exec/ioport.h"
  44 #include "sysemu/dma.h"
  45 #include "exec/address-spaces.h"
  46 #include "sysemu/xen-mapcache.h"
  47 #include "trace.h"
  48 #endif
  49 #include "exec/cpu-all.h"
  50 #include "qemu/rcu_queue.h"
  51 #include "qemu/main-loop.h"
  52 #include "translate-all.h"
  53 #include "sysemu/replay.h"
  54
  55 #include "exec/memory-internal.h"
  56 #include "exec/ram_addr.h"
  57 #include "exec/log.h"
  58
  59 #include "migration/vmstate.h"
  60
  61 #include "qemu/range.h"
  62 #ifndef _WIN32
  63 #include "qemu/mmap-alloc.h"
  64 #endif
  65
  66 //#define DEBUG_SUBPAGE
  67
  68 #if !defined(CONFIG_USER_ONLY)
  69 /* ram_list is read under rcu_read_lock()/rcu_read_unlock().  Writes
  70  * are protected by the ramlist lock.
  71  */
  72 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
  73
  74 static MemoryRegion *system_memory;
  75 static MemoryRegion *system_io;
  76
  77 AddressSpace address_space_io;
  78 AddressSpace address_space_memory;
  79
  80 MemoryRegion io_mem_rom, io_mem_notdirty;
  81 static MemoryRegion io_mem_unassigned;
  82
  83 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
  84 #define RAM_PREALLOC   (1 << 0)
  85
  86 /* RAM is mmap-ed with MAP_SHARED */
  87 #define RAM_SHARED     (1 << 1)
  88
  89 /* Only a portion of RAM (used_length) is actually used, and migrated.
  90  * This used_length size can change across reboots.
  91  */
  92 #define RAM_RESIZEABLE (1 << 2)
  93
  94 #endif
  95
  96 #ifdef TARGET_PAGE_BITS_VARY
  97 int target_page_bits;
  98 bool target_page_bits_decided;
  99 #endif
 100
 101 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
 102 /* current CPU in the current thread. It is only valid inside
 103    cpu_exec() */
 104 __thread CPUState *current_cpu;
 105 /* 0 = Do not count executed instructions.
 106    1 = Precise instruction counting.
 107    2 = Adaptive rate instruction counting.  */
 108 int use_icount;
 109
 110 bool set_preferred_target_page_bits(int bits)
 111 {
 112     /* The target page size is the lowest common denominator for all
 113      * the CPUs in the system, so we can only make it smaller, never
 114      * larger. And we can't make it smaller once we've committed to
 115      * a particular size.
 116      */
 117 #ifdef TARGET_PAGE_BITS_VARY
 118     assert(bits >= TARGET_PAGE_BITS_MIN);
 119     if (target_page_bits == 0 || target_page_bits > bits) {
 120         if (target_page_bits_decided) {
 121             return false;
 122         }
 123         target_page_bits = bits;
 124     }
 125 #endif
 126     return true;
 127 }
 128
 129 #if !defined(CONFIG_USER_ONLY)
 130
 131 static void finalize_target_page_bits(void)
 132 {
 133 #ifdef TARGET_PAGE_BITS_VARY
 134     if (target_page_bits == 0) {
 135         target_page_bits = TARGET_PAGE_BITS_MIN;
 136     }
 137     target_page_bits_decided = true;
 138 #endif
 139 }
 140
 141 typedef struct PhysPageEntry PhysPageEntry;
 142
 143 struct PhysPageEntry {
 144     /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
 145     uint32_t skip : 6;
 146      /* index into phys_sections (!skip) or phys_map_nodes (skip) */
 147     uint32_t ptr : 26;
 148 };
 149
 150 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
 151
 152 /* Size of the L2 (and L3, etc) page tables.  */
 153 #define ADDR_SPACE_BITS 64
 154
 155 #define P_L2_BITS 9
 156 #define P_L2_SIZE (1 << P_L2_BITS)
 157
 158 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
 159
 160 typedef PhysPageEntry Node[P_L2_SIZE];
 161
 162 typedef struct PhysPageMap {
 163     struct rcu_head rcu;
 164
 165     unsigned sections_nb;
 166     unsigned sections_nb_alloc;
 167     unsigned nodes_nb;
 168     unsigned nodes_nb_alloc;
 169     Node *nodes;
 170     MemoryRegionSection *sections;
 171 } PhysPageMap;
 172
 173 struct AddressSpaceDispatch {
 174     struct rcu_head rcu;
 175
 176     MemoryRegionSection *mru_section;
 177     /* This is a multi-level map on the physical address space.
 178      * The bottom level has pointers to MemoryRegionSections.
 179      */
 180     PhysPageEntry phys_map;
 181     PhysPageMap map;
 182     AddressSpace *as;
 183 };
 184
 185 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
 186 typedef struct subpage_t {
 187     MemoryRegion iomem;
 188     AddressSpace *as;
 189     hwaddr base;
 190     uint16_t sub_section[];
 191 } subpage_t;
 192
 193 #define PHYS_SECTION_UNASSIGNED 0
 194 #define PHYS_SECTION_NOTDIRTY 1
 195 #define PHYS_SECTION_ROM 2
 196 #define PHYS_SECTION_WATCH 3
 197
 198 static void io_mem_init(void);
 199 static void memory_map_init(void);
 200 static void tcg_commit(MemoryListener *listener);
 201
 202 static MemoryRegion io_mem_watch;
 203
 204 /**
 205  * CPUAddressSpace: all the information a CPU needs about an AddressSpace
 206  * @cpu: the CPU whose AddressSpace this is
 207  * @as: the AddressSpace itself
 208  * @memory_dispatch: its dispatch pointer (cached, RCU protected)
 209  * @tcg_as_listener: listener for tracking changes to the AddressSpace
 210  */
 211 struct CPUAddressSpace {
 212     CPUState *cpu;
 213     AddressSpace *as;
 214     struct AddressSpaceDispatch *memory_dispatch;
 215     MemoryListener tcg_as_listener;
 216 };
 217
 218 #endif
 219
 220 #if !defined(CONFIG_USER_ONLY)
 221
 222 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
 223 {
 224     static unsigned alloc_hint = 16;
 225     if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
 226         map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, alloc_hint);
 227         map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
 228         map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
 229         alloc_hint = map->nodes_nb_alloc;
 230     }
 231 }
 232
 233 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
 234 {
 235     unsigned i;
 236     uint32_t ret;
 237     PhysPageEntry e;
 238     PhysPageEntry *p;
 239
 240     ret = map->nodes_nb++;
 241     p = map->nodes[ret];
 242     assert(ret != PHYS_MAP_NODE_NIL);
 243     assert(ret != map->nodes_nb_alloc);
 244
 245     e.skip = leaf ? 0 : 1;
 246     e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
 247     for (i = 0; i < P_L2_SIZE; ++i) {
 248         memcpy(&p[i], &e, sizeof(e));
 249     }
 250     return ret;
 251 }
 252
 253 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
 254                                 hwaddr *index, hwaddr *nb, uint16_t leaf,
 255                                 int level)
 256 {
 257     PhysPageEntry *p;
 258     hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
 259
 260     if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
 261         lp->ptr = phys_map_node_alloc(map, level == 0);
 262     }
 263     p = map->nodes[lp->ptr];
 264     lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
 265
 266     while (*nb && lp < &p[P_L2_SIZE]) {
 267         if ((*index & (step - 1)) == 0 && *nb >= step) {
 268             lp->skip = 0;
 269             lp->ptr = leaf;
 270             *index += step;
 271             *nb -= step;
 272         } else {
 273             phys_page_set_level(map, lp, index, nb, leaf, level - 1);
 274         }
 275         ++lp;
 276     }
 277 }
 278
 279 static void phys_page_set(AddressSpaceDispatch *d,
 280                           hwaddr index, hwaddr nb,
 281                           uint16_t leaf)
 282 {
 283     /* Wildly overreserve - it doesn't matter much. */
 284     phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
 285
 286     phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
 287 }
 288
 289 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
 290  * and update our entry so we can skip it and go directly to the destination.
 291  */
 292 static void phys_page_compact(PhysPageEntry *lp, Node *nodes)
 293 {
 294     unsigned valid_ptr = P_L2_SIZE;
 295     int valid = 0;
 296     PhysPageEntry *p;
 297     int i;
 298
 299     if (lp->ptr == PHYS_MAP_NODE_NIL) {
 300         return;
 301     }
 302
 303     p = nodes[lp->ptr];
 304     for (i = 0; i < P_L2_SIZE; i++) {
 305         if (p[i].ptr == PHYS_MAP_NODE_NIL) {
 306             continue;
 307         }
 308
 309         valid_ptr = i;
 310         valid++;
 311         if (p[i].skip) {
 312             phys_page_compact(&p[i], nodes);
 313         }
 314     }
 315
 316     /* We can only compress if there's only one child. */
 317     if (valid != 1) {
 318         return;
 319     }
 320
 321     assert(valid_ptr < P_L2_SIZE);
 322
 323     /* Don't compress if it won't fit in the # of bits we have. */
 324     if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
 325         return;
 326     }
 327
 328     lp->ptr = p[valid_ptr].ptr;
 329     if (!p[valid_ptr].skip) {
 330         /* If our only child is a leaf, make this a leaf. */
 331         /* By design, we should have made this node a leaf to begin with so we
 332          * should never reach here.
 333          * But since it's so simple to handle this, let's do it just in case we
 334          * change this rule.
 335          */
 336         lp->skip = 0;
 337     } else {
 338         lp->skip += p[valid_ptr].skip;
 339     }
 340 }
 341
 342 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
 343 {
 344     if (d->phys_map.skip) {
 345         phys_page_compact(&d->phys_map, d->map.nodes);
 346     }
 347 }
 348
 349 static inline bool section_covers_addr(const MemoryRegionSection *section,
 350                                        hwaddr addr)
 351 {
 352     /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
 353      * the section must cover the entire address space.
 354      */
 355     return int128_gethi(section->size) ||
 356            range_covers_byte(section->offset_within_address_space,
 357                              int128_getlo(section->size), addr);
 358 }
 359
 360 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
 361                                            Node *nodes, MemoryRegionSection *sections)
 362 {
 363     PhysPageEntry *p;
 364     hwaddr index = addr >> TARGET_PAGE_BITS;
 365     int i;
 366
 367     for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
 368         if (lp.ptr == PHYS_MAP_NODE_NIL) {
 369             return &sections[PHYS_SECTION_UNASSIGNED];
 370         }
 371         p = nodes[lp.ptr];
 372         lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
 373     }
 374
 375     if (section_covers_addr(&sections[lp.ptr], addr)) {
 376         return &sections[lp.ptr];
 377     } else {
 378         return &sections[PHYS_SECTION_UNASSIGNED];
 379     }
 380 }
 381
 382 bool memory_region_is_unassigned(MemoryRegion *mr)
 383 {
 384     return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
 385         && mr != &io_mem_watch;
 386 }
 387
 388 /* Called from RCU critical section */
 389 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
 390                                                         hwaddr addr,
 391                                                         bool resolve_subpage)
 392 {
 393     MemoryRegionSection *section = atomic_read(&d->mru_section);
 394     subpage_t *subpage;
 395     bool update;
 396
 397     if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
 398         section_covers_addr(section, addr)) {
 399         update = false;
 400     } else {
 401         section = phys_page_find(d->phys_map, addr, d->map.nodes,
 402                                  d->map.sections);
 403         update = true;
 404     }
 405     if (resolve_subpage && section->mr->subpage) {
 406         subpage = container_of(section->mr, subpage_t, iomem);
 407         section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
 408     }
 409     if (update) {
 410         atomic_set(&d->mru_section, section);
 411     }
 412     return section;
 413 }
 414
 415 /* Called from RCU critical section */
 416 static MemoryRegionSection *
 417 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
 418                                  hwaddr *plen, bool resolve_subpage)
 419 {
 420     MemoryRegionSection *section;
 421     MemoryRegion *mr;
 422     Int128 diff;
 423
 424     section = address_space_lookup_region(d, addr, resolve_subpage);
 425     /* Compute offset within MemoryRegionSection */
 426     addr -= section->offset_within_address_space;
 427
 428     /* Compute offset within MemoryRegion */
 429     *xlat = addr + section->offset_within_region;
 430
 431     mr = section->mr;
 432
 433     /* MMIO registers can be expected to perform full-width accesses based only
 434      * on their address, without considering adjacent registers that could
 435      * decode to completely different MemoryRegions.  When such registers
 436      * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
 437      * regions overlap wildly.  For this reason we cannot clamp the accesses
 438      * here.
 439      *
 440      * If the length is small (as is the case for address_space_ldl/stl),
 441      * everything works fine.  If the incoming length is large, however,
 442      * the caller really has to do the clamping through memory_access_size.
 443      */
 444     if (memory_region_is_ram(mr)) {
 445         diff = int128_sub(section->size, int128_make64(addr));
 446         *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
 447     }
 448     return section;
 449 }
 450
 451 /* Called from RCU critical section */
 452 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
 453                                       hwaddr *xlat, hwaddr *plen,
 454                                       bool is_write)
 455 {
 456     IOMMUTLBEntry iotlb;
 457     MemoryRegionSection *section;
 458     MemoryRegion *mr;
 459
 460     for (;;) {
 461         AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
 462         section = address_space_translate_internal(d, addr, &addr, plen, true);
 463         mr = section->mr;
 464
 465         if (!mr->iommu_ops) {
 466             break;
 467         }
 468
 469         iotlb = mr->iommu_ops->translate(mr, addr, is_write);
 470         addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
 471                 | (addr & iotlb.addr_mask));
 472         *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
 473         if (!(iotlb.perm & (1 << is_write))) {
 474             mr = &io_mem_unassigned;
 475             break;
 476         }
 477
 478         as = iotlb.target_as;
 479     }
 480
 481     if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
 482         hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
 483         *plen = MIN(page, *plen);
 484     }
 485
 486     *xlat = addr;
 487     return mr;
 488 }
 489
 490 /* Called from RCU critical section */
 491 MemoryRegionSection *
 492 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
 493                                   hwaddr *xlat, hwaddr *plen)
 494 {
 495     MemoryRegionSection *section;
 496     AddressSpaceDispatch *d = atomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch);
 497
 498     section = address_space_translate_internal(d, addr, xlat, plen, false);
 499
 500     assert(!section->mr->iommu_ops);
 501     return section;
 502 }
 503 #endif
 504
 505 #if !defined(CONFIG_USER_ONLY)
 506
 507 static int cpu_common_post_load(void *opaque, int version_id)
 508 {
 509     CPUState *cpu = opaque;
 510
 511     /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
 512        version_id is increased. */
 513     cpu->interrupt_request &= ~0x01;
 514     tlb_flush(cpu, 1);
 515
 516     return 0;
 517 }
 518
 519 static int cpu_common_pre_load(void *opaque)
 520 {
 521     CPUState *cpu = opaque;
 522
 523     cpu->exception_index = -1;
 524
 525     return 0;
 526 }
 527
 528 static bool cpu_common_exception_index_needed(void *opaque)
 529 {
 530     CPUState *cpu = opaque;
 531
 532     return tcg_enabled() && cpu->exception_index != -1;
 533 }
 534
 535 static const VMStateDescription vmstate_cpu_common_exception_index = {
 536     .name = "cpu_common/exception_index",
 537     .version_id = 1,
 538     .minimum_version_id = 1,
 539     .needed = cpu_common_exception_index_needed,
 540     .fields = (VMStateField[]) {
 541         VMSTATE_INT32(exception_index, CPUState),
 542         VMSTATE_END_OF_LIST()
 543     }
 544 };
 545
 546 static bool cpu_common_crash_occurred_needed(void *opaque)
 547 {
 548     CPUState *cpu = opaque;
 549
 550     return cpu->crash_occurred;
 551 }
 552
 553 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
 554     .name = "cpu_common/crash_occurred",
 555     .version_id = 1,
 556     .minimum_version_id = 1,
 557     .needed = cpu_common_crash_occurred_needed,
 558     .fields = (VMStateField[]) {
 559         VMSTATE_BOOL(crash_occurred, CPUState),
 560         VMSTATE_END_OF_LIST()
 561     }
 562 };
 563
 564 const VMStateDescription vmstate_cpu_common = {
 565     .name = "cpu_common",
 566     .version_id = 1,
 567     .minimum_version_id = 1,
 568     .pre_load = cpu_common_pre_load,
 569     .post_load = cpu_common_post_load,
 570     .fields = (VMStateField[]) {
 571         VMSTATE_UINT32(halted, CPUState),
 572         VMSTATE_UINT32(interrupt_request, CPUState),
 573         VMSTATE_END_OF_LIST()
 574     },
 575     .subsections = (const VMStateDescription*[]) {
 576         &vmstate_cpu_common_exception_index,
 577         &vmstate_cpu_common_crash_occurred,
 578         NULL
 579     }
 580 };
 581
 582 #endif
 583
 584 CPUState *qemu_get_cpu(int index)
 585 {
 586     CPUState *cpu;
 587
 588     CPU_FOREACH(cpu) {
 589         if (cpu->cpu_index == index) {
 590             return cpu;
 591         }
 592     }
 593
 594     return NULL;
 595 }
 596
 597 #if !defined(CONFIG_USER_ONLY)
 598 void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
 599 {
 600     CPUAddressSpace *newas;
 601
 602     /* Target code should have set num_ases before calling us */
 603     assert(asidx < cpu->num_ases);
 604
 605     if (asidx == 0) {
 606         /* address space 0 gets the convenience alias */
 607         cpu->as = as;
 608     }
 609
 610     /* KVM cannot currently support multiple address spaces. */
 611     assert(asidx == 0 || !kvm_enabled());
 612
 613     if (!cpu->cpu_ases) {
 614         cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
 615     }
 616
 617     newas = &cpu->cpu_ases[asidx];
 618     newas->cpu = cpu;
 619     newas->as = as;
 620     if (tcg_enabled()) {
 621         newas->tcg_as_listener.commit = tcg_commit;
 622         memory_listener_register(&newas->tcg_as_listener, as);
 623     }
 624 }
 625
 626 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
 627 {
 628     /* Return the AddressSpace corresponding to the specified index */
 629     return cpu->cpu_ases[asidx].as;
 630 }
 631 #endif
 632
 633 void cpu_exec_unrealizefn(CPUState *cpu)
 634 {
 635     CPUClass *cc = CPU_GET_CLASS(cpu);
 636
 637     cpu_list_remove(cpu);
 638
 639     if (cc->vmsd != NULL) {
 640         vmstate_unregister(NULL, cc->vmsd, cpu);
 641     }
 642     if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
 643         vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
 644     }
 645 }
 646
 647 void cpu_exec_initfn(CPUState *cpu)
 648 {
 649     cpu->as = NULL;
 650     cpu->num_ases = 0;
 651
 652 #ifndef CONFIG_USER_ONLY
 653     cpu->thread_id = qemu_get_thread_id();
 654
 655     /* This is a softmmu CPU object, so create a property for it
 656      * so users can wire up its memory. (This can't go in qom/cpu.c
 657      * because that file is compiled only once for both user-mode
 658      * and system builds.) The default if no link is set up is to use
 659      * the system address space.
 660      */
 661     object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
 662                              (Object **)&cpu->memory,
 663                              qdev_prop_allow_set_link_before_realize,
 664                              OBJ_PROP_LINK_UNREF_ON_RELEASE,
 665                              &error_abort);
 666     cpu->memory = system_memory;
 667     object_ref(OBJECT(cpu->memory));
 668 #endif
 669 }
 670
 671 void cpu_exec_realizefn(CPUState *cpu, Error **errp)
 672 {
 673     CPUClass *cc ATTRIBUTE_UNUSED = CPU_GET_CLASS(cpu);
 674
 675     cpu_list_add(cpu);
 676
 677 #ifndef CONFIG_USER_ONLY
 678     if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
 679         vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
 680     }
 681     if (cc->vmsd != NULL) {
 682         vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
 683     }
 684 #endif
 685 }
 686
 687 #if defined(CONFIG_USER_ONLY)
 688 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 689 {
 690     mmap_lock();
 691     tb_lock();
 692     tb_invalidate_phys_page_range(pc, pc + 1, 0);
 693     tb_unlock();
 694     mmap_unlock();
 695 }
 696 #else
 697 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 698 {
 699     MemTxAttrs attrs;
 700     hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
 701     int asidx = cpu_asidx_from_attrs(cpu, attrs);
 702     if (phys != -1) {
 703         /* Locks grabbed by tb_invalidate_phys_addr */
 704         tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
 705                                 phys | (pc & ~TARGET_PAGE_MASK));
 706     }
 707 }
 708 #endif
 709
 710 #if defined(CONFIG_USER_ONLY)
 711 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 712
 713 {
 714 }
 715
 716 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 717                           int flags)
 718 {
 719     return -ENOSYS;
 720 }
 721
 722 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 723 {
 724 }
 725
 726 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 727                           int flags, CPUWatchpoint **watchpoint)
 728 {
 729     return -ENOSYS;
 730 }
 731 #else
 732 /* Add a watchpoint.  */
 733 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 734                           int flags, CPUWatchpoint **watchpoint)
 735 {
 736     CPUWatchpoint *wp;
 737
 738     /* forbid ranges which are empty or run off the end of the address space */
 739     if (len == 0 || (addr + len - 1) < addr) {
 740         error_report("tried to set invalid watchpoint at %"
 741                      VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
 742         return -EINVAL;
 743     }
 744     wp = g_malloc(sizeof(*wp));
 745
 746     wp->vaddr = addr;
 747     wp->len = len;
 748     wp->flags = flags;
 749
 750     /* keep all GDB-injected watchpoints in front */
 751     if (flags & BP_GDB) {
 752         QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
 753     } else {
 754         QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
 755     }
 756
 757     tlb_flush_page(cpu, addr);
 758
 759     if (watchpoint)
 760         *watchpoint = wp;
 761     return 0;
 762 }
 763
 764 /* Remove a specific watchpoint.  */
 765 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 766                           int flags)
 767 {
 768     CPUWatchpoint *wp;
 769
 770     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
 771         if (addr == wp->vaddr && len == wp->len
 772                 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
 773             cpu_watchpoint_remove_by_ref(cpu, wp);
 774             return 0;
 775         }
 776     }
 777     return -ENOENT;
 778 }
 779
 780 /* Remove a specific watchpoint by reference.  */
 781 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 782 {
 783     QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
 784
 785     tlb_flush_page(cpu, watchpoint->vaddr);
 786
 787     g_free(watchpoint);
 788 }
 789
 790 /* Remove all matching watchpoints.  */
 791 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 792 {
 793     CPUWatchpoint *wp, *next;
 794
 795     QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
 796         if (wp->flags & mask) {
 797             cpu_watchpoint_remove_by_ref(cpu, wp);
 798         }
 799     }
 800 }
 801
 802 /* Return true if this watchpoint address matches the specified
 803  * access (ie the address range covered by the watchpoint overlaps
 804  * partially or completely with the address range covered by the
 805  * access).
 806  */
 807 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
 808                                                   vaddr addr,
 809                                                   vaddr len)
 810 {
 811     /* We know the lengths are non-zero, but a little caution is
 812      * required to avoid errors in the case where the range ends
 813      * exactly at the top of the address space and so addr + len
 814      * wraps round to zero.
 815      */
 816     vaddr wpend = wp->vaddr + wp->len - 1;
 817     vaddr addrend = addr + len - 1;
 818
 819     return !(addr > wpend || wp->vaddr > addrend);
 820 }
 821
 822 #endif
 823
 824 /* Add a breakpoint.  */
 825 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
 826                           CPUBreakpoint **breakpoint)
 827 {
 828     CPUBreakpoint *bp;
 829
 830     bp = g_malloc(sizeof(*bp));
 831
 832     bp->pc = pc;
 833     bp->flags = flags;
 834
 835     /* keep all GDB-injected breakpoints in front */
 836     if (flags & BP_GDB) {
 837         QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
 838     } else {
 839         QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
 840     }
 841
 842     breakpoint_invalidate(cpu, pc);
 843
 844     if (breakpoint) {
 845         *breakpoint = bp;
 846     }
 847     return 0;
 848 }
 849
 850 /* Remove a specific breakpoint.  */
 851 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
 852 {
 853     CPUBreakpoint *bp;
 854
 855     QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
 856         if (bp->pc == pc && bp->flags == flags) {
 857             cpu_breakpoint_remove_by_ref(cpu, bp);
 858             return 0;
 859         }
 860     }
 861     return -ENOENT;
 862 }
 863
 864 /* Remove a specific breakpoint by reference.  */
 865 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
 866 {
 867     QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
 868
 869     breakpoint_invalidate(cpu, breakpoint->pc);
 870
 871     g_free(breakpoint);
 872 }
 873
 874 /* Remove all matching breakpoints. */
 875 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
 876 {
 877     CPUBreakpoint *bp, *next;
 878
 879     QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
 880         if (bp->flags & mask) {
 881             cpu_breakpoint_remove_by_ref(cpu, bp);
 882         }
 883     }
 884 }
 885
 886 /* enable or disable single step mode. EXCP_DEBUG is returned by the
 887    CPU loop after each instruction */
 888 void cpu_single_step(CPUState *cpu, int enabled)
 889 {
 890     if (cpu->singlestep_enabled != enabled) {
 891         cpu->singlestep_enabled = enabled;
 892         if (kvm_enabled()) {
 893             kvm_update_guest_debug(cpu, 0);
 894         } else {
 895             /* must flush all the translated code to avoid inconsistencies */
 896             /* XXX: only flush what is necessary */
 897             tb_flush(cpu);
 898         }
 899     }
 900 }
 901
 902 void cpu_abort(CPUState *cpu, const char *fmt, ...)
 903 {
 904     va_list ap;
 905     va_list ap2;
 906
 907     va_start(ap, fmt);
 908     va_copy(ap2, ap);
 909     fprintf(stderr, "qemu: fatal: ");
 910     vfprintf(stderr, fmt, ap);
 911     fprintf(stderr, "\n");
 912     cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 913     if (qemu_log_separate()) {
 914         qemu_log_lock();
 915         qemu_log("qemu: fatal: ");
 916         qemu_log_vprintf(fmt, ap2);
 917         qemu_log("\n");
 918         log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 919         qemu_log_flush();
 920         qemu_log_unlock();
 921         qemu_log_close();
 922     }
 923     va_end(ap2);
 924     va_end(ap);
 925     replay_finish();
 926 #if defined(CONFIG_USER_ONLY)
 927     {
 928         struct sigaction act;
 929         sigfillset(&act.sa_mask);
 930         act.sa_handler = SIG_DFL;
 931         sigaction(SIGABRT, &act, NULL);
 932     }
 933 #endif
 934     abort();
 935 }
 936
 937 #if !defined(CONFIG_USER_ONLY)
 938 /* Called from RCU critical section */
 939 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
 940 {
 941     RAMBlock *block;
 942
 943     block = atomic_rcu_read(&ram_list.mru_block);
 944     if (block && addr - block->offset < block->max_length) {
 945         return block;
 946     }
 947     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
 948         if (addr - block->offset < block->max_length) {
 949             goto found;
 950         }
 951     }
 952
 953     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
 954     abort();
 955
 956 found:
 957     /* It is safe to write mru_block outside the iothread lock.  This
 958      * is what happens:
 959      *
 960      *     mru_block = xxx
 961      *     rcu_read_unlock()
 962      *                                        xxx removed from list
 963      *                  rcu_read_lock()
 964      *                  read mru_block
 965      *                                        mru_block = NULL;
 966      *                                        call_rcu(reclaim_ramblock, xxx);
 967      *                  rcu_read_unlock()
 968      *
 969      * atomic_rcu_set is not needed here.  The block was already published
 970      * when it was placed into the list.  Here we're just making an extra
 971      * copy of the pointer.
 972      */
 973     ram_list.mru_block = block;
 974     return block;
 975 }
 976
 977 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
 978 {
 979     CPUState *cpu;
 980     ram_addr_t start1;
 981     RAMBlock *block;
 982     ram_addr_t end;
 983
 984     end = TARGET_PAGE_ALIGN(start + length);
 985     start &= TARGET_PAGE_MASK;
 986
 987     rcu_read_lock();
 988     block = qemu_get_ram_block(start);
 989     assert(block == qemu_get_ram_block(end - 1));
 990     start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
 991     CPU_FOREACH(cpu) {
 992         tlb_reset_dirty(cpu, start1, length);
 993     }
 994     rcu_read_unlock();
 995 }
 996
 997 /* Note: start and end must be within the same ram block.  */
 998 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
 999                                               ram_addr_t length,
1000                                               unsigned client)
1001 {
1002     DirtyMemoryBlocks *blocks;
1003     unsigned long end, page;
1004     bool dirty = false;
1005
1006     if (length == 0) {
1007         return false;
1008     }
1009
1010     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
1011     page = start >> TARGET_PAGE_BITS;
1012
1013     rcu_read_lock();
1014
1015     blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1016
1017     while (page < end) {
1018         unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1019         unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1020         unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1021
1022         dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1023                                               offset, num);
1024         page += num;
1025     }
1026
1027     rcu_read_unlock();
1028
1029     if (dirty && tcg_enabled()) {
1030         tlb_reset_dirty_range_all(start, length);
1031     }
1032
1033     return dirty;
1034 }
1035
1036 /* Called from RCU critical section */
1037 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1038                                        MemoryRegionSection *section,
1039                                        target_ulong vaddr,
1040                                        hwaddr paddr, hwaddr xlat,
1041                                        int prot,
1042                                        target_ulong *address)
1043 {
1044     hwaddr iotlb;
1045     CPUWatchpoint *wp;
1046
1047     if (memory_region_is_ram(section->mr)) {
1048         /* Normal RAM.  */
1049         iotlb = memory_region_get_ram_addr(section->mr) + xlat;
1050         if (!section->readonly) {
1051             iotlb |= PHYS_SECTION_NOTDIRTY;
1052         } else {
1053             iotlb |= PHYS_SECTION_ROM;
1054         }
1055     } else {
1056         AddressSpaceDispatch *d;
1057
1058         d = atomic_rcu_read(&section->address_space->dispatch);
1059         iotlb = section - d->map.sections;
1060         iotlb += xlat;
1061     }
1062
1063     /* Make accesses to pages with watchpoints go via the
1064        watchpoint trap routines.  */
1065     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1066         if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1067             /* Avoid trapping reads of pages with a write breakpoint. */
1068             if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1069                 iotlb = PHYS_SECTION_WATCH + paddr;
1070                 *address |= TLB_MMIO;
1071                 break;
1072             }
1073         }
1074     }
1075
1076     return iotlb;
1077 }
1078 #endif /* defined(CONFIG_USER_ONLY) */
1079
1080 #if !defined(CONFIG_USER_ONLY)
1081
1082 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1083                              uint16_t section);
1084 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1085
1086 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1087                                qemu_anon_ram_alloc;
1088
1089 /*
1090  * Set a custom physical guest memory alloator.
1091  * Accelerators with unusual needs may need this.  Hopefully, we can
1092  * get rid of it eventually.
1093  */
1094 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1095 {
1096     phys_mem_alloc = alloc;
1097 }
1098
1099 static uint16_t phys_section_add(PhysPageMap *map,
1100                                  MemoryRegionSection *section)
1101 {
1102     /* The physical section number is ORed with a page-aligned
1103      * pointer to produce the iotlb entries.  Thus it should
1104      * never overflow into the page-aligned value.
1105      */
1106     assert(map->sections_nb < TARGET_PAGE_SIZE);
1107
1108     if (map->sections_nb == map->sections_nb_alloc) {
1109         map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1110         map->sections = g_renew(MemoryRegionSection, map->sections,
1111                                 map->sections_nb_alloc);
1112     }
1113     map->sections[map->sections_nb] = *section;
1114     memory_region_ref(section->mr);
1115     return map->sections_nb++;
1116 }
1117
1118 static void phys_section_destroy(MemoryRegion *mr)
1119 {
1120     bool have_sub_page = mr->subpage;
1121
1122     memory_region_unref(mr);
1123
1124     if (have_sub_page) {
1125         subpage_t *subpage = container_of(mr, subpage_t, iomem);
1126         object_unref(OBJECT(&subpage->iomem));
1127         g_free(subpage);
1128     }
1129 }
1130
1131 static void phys_sections_free(PhysPageMap *map)
1132 {
1133     while (map->sections_nb > 0) {
1134         MemoryRegionSection *section = &map->sections[--map->sections_nb];
1135         phys_section_destroy(section->mr);
1136     }
1137     g_free(map->sections);
1138     g_free(map->nodes);
1139 }
1140
1141 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1142 {
1143     subpage_t *subpage;
1144     hwaddr base = section->offset_within_address_space
1145         & TARGET_PAGE_MASK;
1146     MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1147                                                    d->map.nodes, d->map.sections);
1148     MemoryRegionSection subsection = {
1149         .offset_within_address_space = base,
1150         .size = int128_make64(TARGET_PAGE_SIZE),
1151     };
1152     hwaddr start, end;
1153
1154     assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1155
1156     if (!(existing->mr->subpage)) {
1157         subpage = subpage_init(d->as, base);
1158         subsection.address_space = d->as;
1159         subsection.mr = &subpage->iomem;
1160         phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1161                       phys_section_add(&d->map, &subsection));
1162     } else {
1163         subpage = container_of(existing->mr, subpage_t, iomem);
1164     }
1165     start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1166     end = start + int128_get64(section->size) - 1;
1167     subpage_register(subpage, start, end,
1168                      phys_section_add(&d->map, section));
1169 }
1170
1171
1172 static void register_multipage(AddressSpaceDispatch *d,
1173                                MemoryRegionSection *section)
1174 {
1175     hwaddr start_addr = section->offset_within_address_space;
1176     uint16_t section_index = phys_section_add(&d->map, section);
1177     uint64_t num_pages = int128_get64(int128_rshift(section->size,
1178                                                     TARGET_PAGE_BITS));
1179
1180     assert(num_pages);
1181     phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1182 }
1183
1184 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1185 {
1186     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1187     AddressSpaceDispatch *d = as->next_dispatch;
1188     MemoryRegionSection now = *section, remain = *section;
1189     Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1190
1191     if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1192         uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1193                        - now.offset_within_address_space;
1194
1195         now.size = int128_min(int128_make64(left), now.size);
1196         register_subpage(d, &now);
1197     } else {
1198         now.size = int128_zero();
1199     }
1200     while (int128_ne(remain.size, now.size)) {
1201         remain.size = int128_sub(remain.size, now.size);
1202         remain.offset_within_address_space += int128_get64(now.size);
1203         remain.offset_within_region += int128_get64(now.size);
1204         now = remain;
1205         if (int128_lt(remain.size, page_size)) {
1206             register_subpage(d, &now);
1207         } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1208             now.size = page_size;
1209             register_subpage(d, &now);
1210         } else {
1211             now.size = int128_and(now.size, int128_neg(page_size));
1212             register_multipage(d, &now);
1213         }
1214     }
1215 }
1216
1217 void qemu_flush_coalesced_mmio_buffer(void)
1218 {
1219     if (kvm_enabled())
1220         kvm_flush_coalesced_mmio_buffer();
1221 }
1222
1223 void qemu_mutex_lock_ramlist(void)
1224 {
1225     qemu_mutex_lock(&ram_list.mutex);
1226 }
1227
1228 void qemu_mutex_unlock_ramlist(void)
1229 {
1230     qemu_mutex_unlock(&ram_list.mutex);
1231 }
1232
1233 #ifdef __linux__
1234 static int64_t get_file_size(int fd)
1235 {
1236     int64_t size = lseek(fd, 0, SEEK_END);
1237     if (size < 0) {
1238         return -errno;
1239     }
1240     return size;
1241 }
1242
1243 static void *file_ram_alloc(RAMBlock *block,
1244                             ram_addr_t memory,
1245                             const char *path,
1246                             Error **errp)
1247 {
1248     bool unlink_on_error = false;
1249     char *filename;
1250     char *sanitized_name;
1251     char *c;
1252     void *area = MAP_FAILED;
1253     int fd = -1;
1254     int64_t file_size;
1255
1256     if (kvm_enabled() && !kvm_has_sync_mmu()) {
1257         error_setg(errp,
1258                    "host lacks kvm mmu notifiers, -mem-path unsupported");
1259         return NULL;
1260     }
1261
1262     for (;;) {
1263         fd = open(path, O_RDWR);
1264         if (fd >= 0) {
1265             /* @path names an existing file, use it */
1266             break;
1267         }
1268         if (errno == ENOENT) {
1269             /* @path names a file that doesn't exist, create it */
1270             fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1271             if (fd >= 0) {
1272                 unlink_on_error = true;
1273                 break;
1274             }
1275         } else if (errno == EISDIR) {
1276             /* @path names a directory, create a file there */
1277             /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1278             sanitized_name = g_strdup(memory_region_name(block->mr));
1279             for (c = sanitized_name; *c != '\0'; c++) {
1280                 if (*c == '/') {
1281                     *c = '_';
1282                 }
1283             }
1284
1285             filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1286                                        sanitized_name);
1287             g_free(sanitized_name);
1288
1289             fd = mkstemp(filename);
1290             if (fd >= 0) {
1291                 unlink(filename);
1292                 g_free(filename);
1293                 break;
1294             }
1295             g_free(filename);
1296         }
1297         if (errno != EEXIST && errno != EINTR) {
1298             error_setg_errno(errp, errno,
1299                              "can't open backing store %s for guest RAM",
1300                              path);
1301             goto error;
1302         }
1303         /*
1304          * Try again on EINTR and EEXIST.  The latter happens when
1305          * something else creates the file between our two open().
1306          */
1307     }
1308
1309     block->page_size = qemu_fd_getpagesize(fd);
1310     block->mr->align = block->page_size;
1311 #if defined(__s390x__)
1312     if (kvm_enabled()) {
1313         block->mr->align = MAX(block->mr->align, QEMU_VMALLOC_ALIGN);
1314     }
1315 #endif
1316
1317     file_size = get_file_size(fd);
1318
1319     if (memory < block->page_size) {
1320         error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1321                    "or larger than page size 0x%zx",
1322                    memory, block->page_size);
1323         goto error;
1324     }
1325
1326     if (file_size > 0 && file_size < memory) {
1327         error_setg(errp, "backing store %s size 0x%" PRIx64
1328                    " does not match 'size' option 0x" RAM_ADDR_FMT,
1329                    path, file_size, memory);
1330         goto error;
1331     }
1332
1333     memory = ROUND_UP(memory, block->page_size);
1334
1335     /*
1336      * ftruncate is not supported by hugetlbfs in older
1337      * hosts, so don't bother bailing out on errors.
1338      * If anything goes wrong with it under other filesystems,
1339      * mmap will fail.
1340      *
1341      * Do not truncate the non-empty backend file to avoid corrupting
1342      * the existing data in the file. Disabling shrinking is not
1343      * enough. For example, the current vNVDIMM implementation stores
1344      * the guest NVDIMM labels at the end of the backend file. If the
1345      * backend file is later extended, QEMU will not be able to find
1346      * those labels. Therefore, extending the non-empty backend file
1347      * is disabled as well.
1348      */
1349     if (!file_size && ftruncate(fd, memory)) {
1350         perror("ftruncate");
1351     }
1352
1353     area = qemu_ram_mmap(fd, memory, block->mr->align,
1354                          block->flags & RAM_SHARED);
1355     if (area == MAP_FAILED) {
1356         error_setg_errno(errp, errno,
1357                          "unable to map backing store for guest RAM");
1358         goto error;
1359     }
1360
1361     if (mem_prealloc) {
1362         os_mem_prealloc(fd, area, memory, errp);
1363         if (errp && *errp) {
1364             goto error;
1365         }
1366     }
1367
1368     block->fd = fd;
1369     return area;
1370
1371 error:
1372     if (area != MAP_FAILED) {
1373         qemu_ram_munmap(area, memory);
1374     }
1375     if (unlink_on_error) {
1376         unlink(path);
1377     }
1378     if (fd != -1) {
1379         close(fd);
1380     }
1381     return NULL;
1382 }
1383 #endif
1384
1385 /* Called with the ramlist lock held.  */
1386 static ram_addr_t find_ram_offset(ram_addr_t size)
1387 {
1388     RAMBlock *block, *next_block;
1389     ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1390
1391     assert(size != 0); /* it would hand out same offset multiple times */
1392
1393     if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1394         return 0;
1395     }
1396
1397     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1398         ram_addr_t end, next = RAM_ADDR_MAX;
1399
1400         end = block->offset + block->max_length;
1401
1402         QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1403             if (next_block->offset >= end) {
1404                 next = MIN(next, next_block->offset);
1405             }
1406         }
1407         if (next - end >= size && next - end < mingap) {
1408             offset = end;
1409             mingap = next - end;
1410         }
1411     }
1412
1413     if (offset == RAM_ADDR_MAX) {
1414         fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1415                 (uint64_t)size);
1416         abort();
1417     }
1418
1419     return offset;
1420 }
1421
1422 ram_addr_t last_ram_offset(void)
1423 {
1424     RAMBlock *block;
1425     ram_addr_t last = 0;
1426
1427     rcu_read_lock();
1428     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1429         last = MAX(last, block->offset + block->max_length);
1430     }
1431     rcu_read_unlock();
1432     return last;
1433 }
1434
1435 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1436 {
1437     int ret;
1438
1439     /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1440     if (!machine_dump_guest_core(current_machine)) {
1441         ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1442         if (ret) {
1443             perror("qemu_madvise");
1444             fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1445                             "but dump_guest_core=off specified\n");
1446         }
1447     }
1448 }
1449
1450 const char *qemu_ram_get_idstr(RAMBlock *rb)
1451 {
1452     return rb->idstr;
1453 }
1454
1455 /* Called with iothread lock held.  */
1456 void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
1457 {
1458     RAMBlock *block;
1459
1460     assert(new_block);
1461     assert(!new_block->idstr[0]);
1462
1463     if (dev) {
1464         char *id = qdev_get_dev_path(dev);
1465         if (id) {
1466             snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1467             g_free(id);
1468         }
1469     }
1470     pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1471
1472     rcu_read_lock();
1473     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1474         if (block != new_block &&
1475             !strcmp(block->idstr, new_block->idstr)) {
1476             fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1477                     new_block->idstr);
1478             abort();
1479         }
1480     }
1481     rcu_read_unlock();
1482 }
1483
1484 /* Called with iothread lock held.  */
1485 void qemu_ram_unset_idstr(RAMBlock *block)
1486 {
1487     /* FIXME: arch_init.c assumes that this is not called throughout
1488      * migration.  Ignore the problem since hot-unplug during migration
1489      * does not work anyway.
1490      */
1491     if (block) {
1492         memset(block->idstr, 0, sizeof(block->idstr));
1493     }
1494 }
1495
1496 size_t qemu_ram_pagesize(RAMBlock *rb)
1497 {
1498     return rb->page_size;
1499 }
1500
1501 static int memory_try_enable_merging(void *addr, size_t len)
1502 {
1503     if (!machine_mem_merge(current_machine)) {
1504         /* disabled by the user */
1505         return 0;
1506     }
1507
1508     return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1509 }
1510
1511 /* Only legal before guest might have detected the memory size: e.g. on
1512  * incoming migration, or right after reset.
1513  *
1514  * As memory core doesn't know how is memory accessed, it is up to
1515  * resize callback to update device state and/or add assertions to detect
1516  * misuse, if necessary.
1517  */
1518 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
1519 {
1520     assert(block);
1521
1522     newsize = HOST_PAGE_ALIGN(newsize);
1523
1524     if (block->used_length == newsize) {
1525         return 0;
1526     }
1527
1528     if (!(block->flags & RAM_RESIZEABLE)) {
1529         error_setg_errno(errp, EINVAL,
1530                          "Length mismatch: %s: 0x" RAM_ADDR_FMT
1531                          " in != 0x" RAM_ADDR_FMT, block->idstr,
1532                          newsize, block->used_length);
1533         return -EINVAL;
1534     }
1535
1536     if (block->max_length < newsize) {
1537         error_setg_errno(errp, EINVAL,
1538                          "Length too large: %s: 0x" RAM_ADDR_FMT
1539                          " > 0x" RAM_ADDR_FMT, block->idstr,
1540                          newsize, block->max_length);
1541         return -EINVAL;
1542     }
1543
1544     cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1545     block->used_length = newsize;
1546     cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1547                                         DIRTY_CLIENTS_ALL);
1548     memory_region_set_size(block->mr, newsize);
1549     if (block->resized) {
1550         block->resized(block->idstr, newsize, block->host);
1551     }
1552     return 0;
1553 }
1554
1555 /* Called with ram_list.mutex held */
1556 static void dirty_memory_extend(ram_addr_t old_ram_size,
1557                                 ram_addr_t new_ram_size)
1558 {
1559     ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1560                                              DIRTY_MEMORY_BLOCK_SIZE);
1561     ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1562                                              DIRTY_MEMORY_BLOCK_SIZE);
1563     int i;
1564
1565     /* Only need to extend if block count increased */
1566     if (new_num_blocks <= old_num_blocks) {
1567         return;
1568     }
1569
1570     for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1571         DirtyMemoryBlocks *old_blocks;
1572         DirtyMemoryBlocks *new_blocks;
1573         int j;
1574
1575         old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1576         new_blocks = g_malloc(sizeof(*new_blocks) +
1577                               sizeof(new_blocks->blocks[0]) * new_num_blocks);
1578
1579         if (old_num_blocks) {
1580             memcpy(new_blocks->blocks, old_blocks->blocks,
1581                    old_num_blocks * sizeof(old_blocks->blocks[0]));
1582         }
1583
1584         for (j = old_num_blocks; j < new_num_blocks; j++) {
1585             new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1586         }
1587
1588         atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1589
1590         if (old_blocks) {
1591             g_free_rcu(old_blocks, rcu);
1592         }
1593     }
1594 }
1595
1596 static void ram_block_add(RAMBlock *new_block, Error **errp)
1597 {
1598     RAMBlock *block;
1599     RAMBlock *last_block = NULL;
1600     ram_addr_t old_ram_size, new_ram_size;
1601     Error *err = NULL;
1602
1603     old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1604
1605     qemu_mutex_lock_ramlist();
1606     new_block->offset = find_ram_offset(new_block->max_length);
1607
1608     if (!new_block->host) {
1609         if (xen_enabled()) {
1610             xen_ram_alloc(new_block->offset, new_block->max_length,
1611                           new_block->mr, &err);
1612             if (err) {
1613                 error_propagate(errp, err);
1614                 qemu_mutex_unlock_ramlist();
1615                 return;
1616             }
1617         } else {
1618             new_block->host = phys_mem_alloc(new_block->max_length,
1619                                              &new_block->mr->align);
1620             if (!new_block->host) {
1621                 error_setg_errno(errp, errno,
1622                                  "cannot set up guest memory '%s'",
1623                                  memory_region_name(new_block->mr));
1624                 qemu_mutex_unlock_ramlist();
1625                 return;
1626             }
1627             memory_try_enable_merging(new_block->host, new_block->max_length);
1628         }
1629     }
1630
1631     new_ram_size = MAX(old_ram_size,
1632               (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1633     if (new_ram_size > old_ram_size) {
1634         migration_bitmap_extend(old_ram_size, new_ram_size);
1635         dirty_memory_extend(old_ram_size, new_ram_size);
1636     }
1637     /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
1638      * QLIST (which has an RCU-friendly variant) does not have insertion at
1639      * tail, so save the last element in last_block.
1640      */
1641     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1642         last_block = block;
1643         if (block->max_length < new_block->max_length) {
1644             break;
1645         }
1646     }
1647     if (block) {
1648         QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1649     } else if (last_block) {
1650         QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1651     } else { /* list is empty */
1652         QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1653     }
1654     ram_list.mru_block = NULL;
1655
1656     /* Write list before version */
1657     smp_wmb();
1658     ram_list.version++;
1659     qemu_mutex_unlock_ramlist();
1660
1661     cpu_physical_memory_set_dirty_range(new_block->offset,
1662                                         new_block->used_length,
1663                                         DIRTY_CLIENTS_ALL);
1664
1665     if (new_block->host) {
1666         qemu_ram_setup_dump(new_block->host, new_block->max_length);
1667         qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1668         /* MADV_DONTFORK is also needed by KVM in absence of synchronous MMU */
1669         qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1670     }
1671 }
1672
1673 #ifdef __linux__
1674 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1675                                    bool share, const char *mem_path,
1676                                    Error **errp)
1677 {
1678     RAMBlock *new_block;
1679     Error *local_err = NULL;
1680
1681     if (xen_enabled()) {
1682         error_setg(errp, "-mem-path not supported with Xen");
1683         return NULL;
1684     }
1685
1686     if (phys_mem_alloc != qemu_anon_ram_alloc) {
1687         /*
1688          * file_ram_alloc() needs to allocate just like
1689          * phys_mem_alloc, but we haven't bothered to provide
1690          * a hook there.
1691          */
1692         error_setg(errp,
1693                    "-mem-path not supported with this accelerator");
1694         return NULL;
1695     }
1696
1697     size = HOST_PAGE_ALIGN(size);
1698     new_block = g_malloc0(sizeof(*new_block));
1699     new_block->mr = mr;
1700     new_block->used_length = size;
1701     new_block->max_length = size;
1702     new_block->flags = share ? RAM_SHARED : 0;
1703     new_block->host = file_ram_alloc(new_block, size,
1704                                      mem_path, errp);
1705     if (!new_block->host) {
1706         g_free(new_block);
1707         return NULL;
1708     }
1709
1710     ram_block_add(new_block, &local_err);
1711     if (local_err) {
1712         g_free(new_block);
1713         error_propagate(errp, local_err);
1714         return NULL;
1715     }
1716     return new_block;
1717 }
1718 #endif
1719
1720 static
1721 RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1722                                   void (*resized)(const char*,
1723                                                   uint64_t length,
1724                                                   void *host),
1725                                   void *host, bool resizeable,
1726                                   MemoryRegion *mr, Error **errp)
1727 {
1728     RAMBlock *new_block;
1729     Error *local_err = NULL;
1730
1731     size = HOST_PAGE_ALIGN(size);
1732     max_size = HOST_PAGE_ALIGN(max_size);
1733     new_block = g_malloc0(sizeof(*new_block));
1734     new_block->mr = mr;
1735     new_block->resized = resized;
1736     new_block->used_length = size;
1737     new_block->max_length = max_size;
1738     assert(max_size >= size);
1739     new_block->fd = -1;
1740     new_block->page_size = getpagesize();
1741     new_block->host = host;
1742     if (host) {
1743         new_block->flags |= RAM_PREALLOC;
1744     }
1745     if (resizeable) {
1746         new_block->flags |= RAM_RESIZEABLE;
1747     }
1748     ram_block_add(new_block, &local_err);
1749     if (local_err) {
1750         g_free(new_block);
1751         error_propagate(errp, local_err);
1752         return NULL;
1753     }
1754     return new_block;
1755 }
1756
1757 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1758                                    MemoryRegion *mr, Error **errp)
1759 {
1760     return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1761 }
1762
1763 RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1764 {
1765     return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1766 }
1767
1768 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1769                                      void (*resized)(const char*,
1770                                                      uint64_t length,
1771                                                      void *host),
1772                                      MemoryRegion *mr, Error **errp)
1773 {
1774     return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1775 }
1776
1777 static void reclaim_ramblock(RAMBlock *block)
1778 {
1779     if (block->flags & RAM_PREALLOC) {
1780         ;
1781     } else if (xen_enabled()) {
1782         xen_invalidate_map_cache_entry(block->host);
1783 #ifndef _WIN32
1784     } else if (block->fd >= 0) {
1785         qemu_ram_munmap(block->host, block->max_length);
1786         close(block->fd);
1787 #endif
1788     } else {
1789         qemu_anon_ram_free(block->host, block->max_length);
1790     }
1791     g_free(block);
1792 }
1793
1794 void qemu_ram_free(RAMBlock *block)
1795 {
1796     if (!block) {
1797         return;
1798     }
1799
1800     qemu_mutex_lock_ramlist();
1801     QLIST_REMOVE_RCU(block, next);
1802     ram_list.mru_block = NULL;
1803     /* Write list before version */
1804     smp_wmb();
1805     ram_list.version++;
1806     call_rcu(block, reclaim_ramblock, rcu);
1807     qemu_mutex_unlock_ramlist();
1808 }
1809
1810 #ifndef _WIN32
1811 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1812 {
1813     RAMBlock *block;
1814     ram_addr_t offset;
1815     int flags;
1816     void *area, *vaddr;
1817
1818     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1819         offset = addr - block->offset;
1820         if (offset < block->max_length) {
1821             vaddr = ramblock_ptr(block, offset);
1822             if (block->flags & RAM_PREALLOC) {
1823                 ;
1824             } else if (xen_enabled()) {
1825                 abort();
1826             } else {
1827                 flags = MAP_FIXED;
1828                 if (block->fd >= 0) {
1829                     flags |= (block->flags & RAM_SHARED ?
1830                               MAP_SHARED : MAP_PRIVATE);
1831                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1832                                 flags, block->fd, offset);
1833                 } else {
1834                     /*
1835                      * Remap needs to match alloc.  Accelerators that
1836                      * set phys_mem_alloc never remap.  If they did,
1837                      * we'd need a remap hook here.
1838                      */
1839                     assert(phys_mem_alloc == qemu_anon_ram_alloc);
1840
1841                     flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1842                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1843                                 flags, -1, 0);
1844                 }
1845                 if (area != vaddr) {
1846                     fprintf(stderr, "Could not remap addr: "
1847                             RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1848                             length, addr);
1849                     exit(1);
1850                 }
1851                 memory_try_enable_merging(vaddr, length);
1852                 qemu_ram_setup_dump(vaddr, length);
1853             }
1854         }
1855     }
1856 }
1857 #endif /* !_WIN32 */
1858
1859 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1860  * This should not be used for general purpose DMA.  Use address_space_map
1861  * or address_space_rw instead. For local memory (e.g. video ram) that the
1862  * device owns, use memory_region_get_ram_ptr.
1863  *
1864  * Called within RCU critical section.
1865  */
1866 void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1867 {
1868     RAMBlock *block = ram_block;
1869
1870     if (block == NULL) {
1871         block = qemu_get_ram_block(addr);
1872         addr -= block->offset;
1873     }
1874
1875     if (xen_enabled() && block->host == NULL) {
1876         /* We need to check if the requested address is in the RAM
1877          * because we don't want to map the entire memory in QEMU.
1878          * In that case just map until the end of the page.
1879          */
1880         if (block->offset == 0) {
1881             return xen_map_cache(addr, 0, 0);
1882         }
1883
1884         block->host = xen_map_cache(block->offset, block->max_length, 1);
1885     }
1886     return ramblock_ptr(block, addr);
1887 }
1888
1889 /* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr
1890  * but takes a size argument.
1891  *
1892  * Called within RCU critical section.
1893  */
1894 static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1895                                  hwaddr *size)
1896 {
1897     RAMBlock *block = ram_block;
1898     if (*size == 0) {
1899         return NULL;
1900     }
1901
1902     if (block == NULL) {
1903         block = qemu_get_ram_block(addr);
1904         addr -= block->offset;
1905     }
1906     *size = MIN(*size, block->max_length - addr);
1907
1908     if (xen_enabled() && block->host == NULL) {
1909         /* We need to check if the requested address is in the RAM
1910          * because we don't want to map the entire memory in QEMU.
1911          * In that case just map the requested area.
1912          */
1913         if (block->offset == 0) {
1914             return xen_map_cache(addr, *size, 1);
1915         }
1916
1917         block->host = xen_map_cache(block->offset, block->max_length, 1);
1918     }
1919
1920     return ramblock_ptr(block, addr);
1921 }
1922
1923 /*
1924  * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1925  * in that RAMBlock.
1926  *
1927  * ptr: Host pointer to look up
1928  * round_offset: If true round the result offset down to a page boundary
1929  * *ram_addr: set to result ram_addr
1930  * *offset: set to result offset within the RAMBlock
1931  *
1932  * Returns: RAMBlock (or NULL if not found)
1933  *
1934  * By the time this function returns, the returned pointer is not protected
1935  * by RCU anymore.  If the caller is not within an RCU critical section and
1936  * does not hold the iothread lock, it must have other means of protecting the
1937  * pointer, such as a reference to the region that includes the incoming
1938  * ram_addr_t.
1939  */
1940 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1941                                    ram_addr_t *offset)
1942 {
1943     RAMBlock *block;
1944     uint8_t *host = ptr;
1945
1946     if (xen_enabled()) {
1947         ram_addr_t ram_addr;
1948         rcu_read_lock();
1949         ram_addr = xen_ram_addr_from_mapcache(ptr);
1950         block = qemu_get_ram_block(ram_addr);
1951         if (block) {
1952             *offset = ram_addr - block->offset;
1953         }
1954         rcu_read_unlock();
1955         return block;
1956     }
1957
1958     rcu_read_lock();
1959     block = atomic_rcu_read(&ram_list.mru_block);
1960     if (block && block->host && host - block->host < block->max_length) {
1961         goto found;
1962     }
1963
1964     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1965         /* This case append when the block is not mapped. */
1966         if (block->host == NULL) {
1967             continue;
1968         }
1969         if (host - block->host < block->max_length) {
1970             goto found;
1971         }
1972     }
1973
1974     rcu_read_unlock();
1975     return NULL;
1976
1977 found:
1978     *offset = (host - block->host);
1979     if (round_offset) {
1980         *offset &= TARGET_PAGE_MASK;
1981     }
1982     rcu_read_unlock();
1983     return block;
1984 }
1985
1986 /*
1987  * Finds the named RAMBlock
1988  *
1989  * name: The name of RAMBlock to find
1990  *
1991  * Returns: RAMBlock (or NULL if not found)
1992  */
1993 RAMBlock *qemu_ram_block_by_name(const char *name)
1994 {
1995     RAMBlock *block;
1996
1997     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1998         if (!strcmp(name, block->idstr)) {
1999             return block;
2000         }
2001     }
2002
2003     return NULL;
2004 }
2005
2006 /* Some of the softmmu routines need to translate from a host pointer
2007    (typically a TLB entry) back to a ram offset.  */
2008 ram_addr_t qemu_ram_addr_from_host(void *ptr)
2009 {
2010     RAMBlock *block;
2011     ram_addr_t offset;
2012
2013     block = qemu_ram_block_from_host(ptr, false, &offset);
2014     if (!block) {
2015         return RAM_ADDR_INVALID;
2016     }
2017
2018     return block->offset + offset;
2019 }
2020
2021 /* Called within RCU critical section.  */
2022 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2023                                uint64_t val, unsigned size)
2024 {
2025     bool locked = false;
2026
2027     if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
2028         locked = true;
2029         tb_lock();
2030         tb_invalidate_phys_page_fast(ram_addr, size);
2031     }
2032     switch (size) {
2033     case 1:
2034         stb_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2035         break;
2036     case 2:
2037         stw_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2038         break;
2039     case 4:
2040         stl_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2041         break;
2042     default:
2043         abort();
2044     }
2045
2046     if (locked) {
2047         tb_unlock();
2048     }
2049
2050     /* Set both VGA and migration bits for simplicity and to remove
2051      * the notdirty callback faster.
2052      */
2053     cpu_physical_memory_set_dirty_range(ram_addr, size,
2054                                         DIRTY_CLIENTS_NOCODE);
2055     /* we remove the notdirty callback only if the code has been
2056        flushed */
2057     if (!cpu_physical_memory_is_clean(ram_addr)) {
2058         tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2059     }
2060 }
2061
2062 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2063                                  unsigned size, bool is_write)
2064 {
2065     return is_write;
2066 }
2067
2068 static const MemoryRegionOps notdirty_mem_ops = {
2069     .write = notdirty_mem_write,
2070     .valid.accepts = notdirty_mem_accepts,
2071     .endianness = DEVICE_NATIVE_ENDIAN,
2072 };
2073
2074 /* Generate a debug exception if a watchpoint has been hit.  */
2075 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2076 {
2077     CPUState *cpu = current_cpu;
2078     CPUClass *cc = CPU_GET_CLASS(cpu);
2079     CPUArchState *env = cpu->env_ptr;
2080     target_ulong pc, cs_base;
2081     target_ulong vaddr;
2082     CPUWatchpoint *wp;
2083     uint32_t cpu_flags;
2084
2085     if (cpu->watchpoint_hit) {
2086         /* We re-entered the check after replacing the TB. Now raise
2087          * the debug interrupt so that is will trigger after the
2088          * current instruction. */
2089         cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2090         return;
2091     }
2092     vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2093     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2094         if (cpu_watchpoint_address_matches(wp, vaddr, len)
2095             && (wp->flags & flags)) {
2096             if (flags == BP_MEM_READ) {
2097                 wp->flags |= BP_WATCHPOINT_HIT_READ;
2098             } else {
2099                 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2100             }
2101             wp->hitaddr = vaddr;
2102             wp->hitattrs = attrs;
2103             if (!cpu->watchpoint_hit) {
2104                 if (wp->flags & BP_CPU &&
2105                     !cc->debug_check_watchpoint(cpu, wp)) {
2106                     wp->flags &= ~BP_WATCHPOINT_HIT;
2107                     continue;
2108                 }
2109                 cpu->watchpoint_hit = wp;
2110
2111                 /* The tb_lock will be reset when cpu_loop_exit or
2112                  * cpu_loop_exit_noexc longjmp back into the cpu_exec
2113                  * main loop.
2114                  */
2115                 tb_lock();
2116                 tb_check_watchpoint(cpu);
2117                 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2118                     cpu->exception_index = EXCP_DEBUG;
2119                     cpu_loop_exit(cpu);
2120                 } else {
2121                     cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2122                     tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2123                     cpu_loop_exit_noexc(cpu);
2124                 }
2125             }
2126         } else {
2127             wp->flags &= ~BP_WATCHPOINT_HIT;
2128         }
2129     }
2130 }
2131
2132 /* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
2133    so these check for a hit then pass through to the normal out-of-line
2134    phys routines.  */
2135 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2136                                   unsigned size, MemTxAttrs attrs)
2137 {
2138     MemTxResult res;
2139     uint64_t data;
2140     int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2141     AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2142
2143     check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2144     switch (size) {
2145     case 1:
2146         data = address_space_ldub(as, addr, attrs, &res);
2147         break;
2148     case 2:
2149         data = address_space_lduw(as, addr, attrs, &res);
2150         break;
2151     case 4:
2152         data = address_space_ldl(as, addr, attrs, &res);
2153         break;
2154     default: abort();
2155     }
2156     *pdata = data;
2157     return res;
2158 }
2159
2160 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2161                                    uint64_t val, unsigned size,
2162                                    MemTxAttrs attrs)
2163 {
2164     MemTxResult res;
2165     int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2166     AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2167
2168     check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2169     switch (size) {
2170     case 1:
2171         address_space_stb(as, addr, val, attrs, &res);
2172         break;
2173     case 2:
2174         address_space_stw(as, addr, val, attrs, &res);
2175         break;
2176     case 4:
2177         address_space_stl(as, addr, val, attrs, &res);
2178         break;
2179     default: abort();
2180     }
2181     return res;
2182 }
2183
2184 static const MemoryRegionOps watch_mem_ops = {
2185     .read_with_attrs = watch_mem_read,
2186     .write_with_attrs = watch_mem_write,
2187     .endianness = DEVICE_NATIVE_ENDIAN,
2188 };
2189
2190 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2191                                 unsigned len, MemTxAttrs attrs)
2192 {
2193     subpage_t *subpage = opaque;
2194     uint8_t buf[8];
2195     MemTxResult res;
2196
2197 #if defined(DEBUG_SUBPAGE)
2198     printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2199            subpage, len, addr);
2200 #endif
2201     res = address_space_read(subpage->as, addr + subpage->base,
2202                              attrs, buf, len);
2203     if (res) {
2204         return res;
2205     }
2206     switch (len) {
2207     case 1:
2208         *data = ldub_p(buf);
2209         return MEMTX_OK;
2210     case 2:
2211         *data = lduw_p(buf);
2212         return MEMTX_OK;
2213     case 4:
2214         *data = ldl_p(buf);
2215         return MEMTX_OK;
2216     case 8:
2217         *data = ldq_p(buf);
2218         return MEMTX_OK;
2219     default:
2220         abort();
2221     }
2222 }
2223
2224 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2225                                  uint64_t value, unsigned len, MemTxAttrs attrs)
2226 {
2227     subpage_t *subpage = opaque;
2228     uint8_t buf[8];
2229
2230 #if defined(DEBUG_SUBPAGE)
2231     printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2232            " value %"PRIx64"\n",
2233            __func__, subpage, len, addr, value);
2234 #endif
2235     switch (len) {
2236     case 1:
2237         stb_p(buf, value);
2238         break;
2239     case 2:
2240         stw_p(buf, value);
2241         break;
2242     case 4:
2243         stl_p(buf, value);
2244         break;
2245     case 8:
2246         stq_p(buf, value);
2247         break;
2248     default:
2249         abort();
2250     }
2251     return address_space_write(subpage->as, addr + subpage->base,
2252                                attrs, buf, len);
2253 }
2254
2255 static bool subpage_accepts(void *opaque, hwaddr addr,
2256                             unsigned len, bool is_write)
2257 {
2258     subpage_t *subpage = opaque;
2259 #if defined(DEBUG_SUBPAGE)
2260     printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2261            __func__, subpage, is_write ? 'w' : 'r', len, addr);
2262 #endif
2263
2264     return address_space_access_valid(subpage->as, addr + subpage->base,
2265                                       len, is_write);
2266 }
2267
2268 static const MemoryRegionOps subpage_ops = {
2269     .read_with_attrs = subpage_read,
2270     .write_with_attrs = subpage_write,
2271     .impl.min_access_size = 1,
2272     .impl.max_access_size = 8,
2273     .valid.min_access_size = 1,
2274     .valid.max_access_size = 8,
2275     .valid.accepts = subpage_accepts,
2276     .endianness = DEVICE_NATIVE_ENDIAN,
2277 };
2278
2279 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2280                              uint16_t section)
2281 {
2282     int idx, eidx;
2283
2284     if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2285         return -1;
2286     idx = SUBPAGE_IDX(start);
2287     eidx = SUBPAGE_IDX(end);
2288 #if defined(DEBUG_SUBPAGE)
2289     printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2290            __func__, mmio, start, end, idx, eidx, section);
2291 #endif
2292     for (; idx <= eidx; idx++) {
2293         mmio->sub_section[idx] = section;
2294     }
2295
2296     return 0;
2297 }
2298
2299 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2300 {
2301     subpage_t *mmio;
2302
2303     mmio = g_malloc0(sizeof(subpage_t) + TARGET_PAGE_SIZE * sizeof(uint16_t));
2304     mmio->as = as;
2305     mmio->base = base;
2306     memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2307                           NULL, TARGET_PAGE_SIZE);
2308     mmio->iomem.subpage = true;
2309 #if defined(DEBUG_SUBPAGE)
2310     printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2311            mmio, base, TARGET_PAGE_SIZE);
2312 #endif
2313     subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2314
2315     return mmio;
2316 }
2317
2318 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2319                               MemoryRegion *mr)
2320 {
2321     assert(as);
2322     MemoryRegionSection section = {
2323         .address_space = as,
2324         .mr = mr,
2325         .offset_within_address_space = 0,
2326         .offset_within_region = 0,
2327         .size = int128_2_64(),
2328     };
2329
2330     return phys_section_add(map, &section);
2331 }
2332
2333 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2334 {
2335     int asidx = cpu_asidx_from_attrs(cpu, attrs);
2336     CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2337     AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2338     MemoryRegionSection *sections = d->map.sections;
2339
2340     return sections[index & ~TARGET_PAGE_MASK].mr;
2341 }
2342
2343 static void io_mem_init(void)
2344 {
2345     memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2346     memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2347                           NULL, UINT64_MAX);
2348     memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2349                           NULL, UINT64_MAX);
2350     memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2351                           NULL, UINT64_MAX);
2352 }
2353
2354 static void mem_begin(MemoryListener *listener)
2355 {
2356     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2357     AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2358     uint16_t n;
2359
2360     n = dummy_section(&d->map, as, &io_mem_unassigned);
2361     assert(n == PHYS_SECTION_UNASSIGNED);
2362     n = dummy_section(&d->map, as, &io_mem_notdirty);
2363     assert(n == PHYS_SECTION_NOTDIRTY);
2364     n = dummy_section(&d->map, as, &io_mem_rom);
2365     assert(n == PHYS_SECTION_ROM);
2366     n = dummy_section(&d->map, as, &io_mem_watch);
2367     assert(n == PHYS_SECTION_WATCH);
2368
2369     d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2370     d->as = as;
2371     as->next_dispatch = d;
2372 }
2373
2374 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2375 {
2376     phys_sections_free(&d->map);
2377     g_free(d);
2378 }
2379
2380 static void mem_commit(MemoryListener *listener)
2381 {
2382     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2383     AddressSpaceDispatch *cur = as->dispatch;
2384     AddressSpaceDispatch *next = as->next_dispatch;
2385
2386     phys_page_compact_all(next, next->map.nodes_nb);
2387
2388     atomic_rcu_set(&as->dispatch, next);
2389     if (cur) {
2390         call_rcu(cur, address_space_dispatch_free, rcu);
2391     }
2392 }
2393
2394 static void tcg_commit(MemoryListener *listener)
2395 {
2396     CPUAddressSpace *cpuas;
2397     AddressSpaceDispatch *d;
2398
2399     /* since each CPU stores ram addresses in its TLB cache, we must
2400        reset the modified entries */
2401     cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2402     cpu_reloading_memory_map();
2403     /* The CPU and TLB are protected by the iothread lock.
2404      * We reload the dispatch pointer now because cpu_reloading_memory_map()
2405      * may have split the RCU critical section.
2406      */
2407     d = atomic_rcu_read(&cpuas->as->dispatch);
2408     atomic_rcu_set(&cpuas->memory_dispatch, d);
2409     tlb_flush(cpuas->cpu, 1);
2410 }
2411
2412 void address_space_init_dispatch(AddressSpace *as)
2413 {
2414     as->dispatch = NULL;
2415     as->dispatch_listener = (MemoryListener) {
2416         .begin = mem_begin,
2417         .commit = mem_commit,
2418         .region_add = mem_add,
2419         .region_nop = mem_add,
2420         .priority = 0,
2421     };
2422     memory_listener_register(&as->dispatch_listener, as);
2423 }
2424
2425 void address_space_unregister(AddressSpace *as)
2426 {
2427     memory_listener_unregister(&as->dispatch_listener);
2428 }
2429
2430 void address_space_destroy_dispatch(AddressSpace *as)
2431 {
2432     AddressSpaceDispatch *d = as->dispatch;
2433
2434     atomic_rcu_set(&as->dispatch, NULL);
2435     if (d) {
2436         call_rcu(d, address_space_dispatch_free, rcu);
2437     }
2438 }
2439
2440 static void memory_map_init(void)
2441 {
2442     system_memory = g_malloc(sizeof(*system_memory));
2443
2444     memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2445     address_space_init(&address_space_memory, system_memory, "memory");
2446
2447     system_io = g_malloc(sizeof(*system_io));
2448     memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2449                           65536);
2450     address_space_init(&address_space_io, system_io, "I/O");
2451 }
2452
2453 MemoryRegion *get_system_memory(void)
2454 {
2455     return system_memory;
2456 }
2457
2458 MemoryRegion *get_system_io(void)
2459 {
2460     return system_io;
2461 }
2462
2463 #endif /* !defined(CONFIG_USER_ONLY) */
2464
2465 /* physical memory access (slow version, mainly for debug) */
2466 #if defined(CONFIG_USER_ONLY)
2467 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2468                         uint8_t *buf, int len, int is_write)
2469 {
2470     int l, flags;
2471     target_ulong page;
2472     void * p;
2473
2474     while (len > 0) {
2475         page = addr & TARGET_PAGE_MASK;
2476         l = (page + TARGET_PAGE_SIZE) - addr;
2477         if (l > len)
2478             l = len;
2479         flags = page_get_flags(page);
2480         if (!(flags & PAGE_VALID))
2481             return -1;
2482         if (is_write) {
2483             if (!(flags & PAGE_WRITE))
2484                 return -1;
2485             /* XXX: this code should not depend on lock_user */
2486             if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2487                 return -1;
2488             memcpy(p, buf, l);
2489             unlock_user(p, addr, l);
2490         } else {
2491             if (!(flags & PAGE_READ))
2492                 return -1;
2493             /* XXX: this code should not depend on lock_user */
2494             if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2495                 return -1;
2496             memcpy(buf, p, l);
2497             unlock_user(p, addr, 0);
2498         }
2499         len -= l;
2500         buf += l;
2501         addr += l;
2502     }
2503     return 0;
2504 }
2505
2506 #else
2507
2508 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2509                                      hwaddr length)
2510 {
2511     uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2512     addr += memory_region_get_ram_addr(mr);
2513
2514     /* No early return if dirty_log_mask is or becomes 0, because
2515      * cpu_physical_memory_set_dirty_range will still call
2516      * xen_modified_memory.
2517      */
2518     if (dirty_log_mask) {
2519         dirty_log_mask =
2520             cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2521     }
2522     if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2523         tb_lock();
2524         tb_invalidate_phys_range(addr, addr + length);
2525         tb_unlock();
2526         dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2527     }
2528     cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2529 }
2530
2531 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2532 {
2533     unsigned access_size_max = mr->ops->valid.max_access_size;
2534
2535     /* Regions are assumed to support 1-4 byte accesses unless
2536        otherwise specified.  */
2537     if (access_size_max == 0) {
2538         access_size_max = 4;
2539     }
2540
2541     /* Bound the maximum access by the alignment of the address.  */
2542     if (!mr->ops->impl.unaligned) {
2543         unsigned align_size_max = addr & -addr;
2544         if (align_size_max != 0 && align_size_max < access_size_max) {
2545             access_size_max = align_size_max;
2546         }
2547     }
2548
2549     /* Don't attempt accesses larger than the maximum.  */
2550     if (l > access_size_max) {
2551         l = access_size_max;
2552     }
2553     l = pow2floor(l);
2554
2555     return l;
2556 }
2557
2558 static bool prepare_mmio_access(MemoryRegion *mr)
2559 {
2560     bool unlocked = !qemu_mutex_iothread_locked();
2561     bool release_lock = false;
2562
2563     if (unlocked && mr->global_locking) {
2564         qemu_mutex_lock_iothread();
2565         unlocked = false;
2566         release_lock = true;
2567     }
2568     if (mr->flush_coalesced_mmio) {
2569         if (unlocked) {
2570             qemu_mutex_lock_iothread();
2571         }
2572         qemu_flush_coalesced_mmio_buffer();
2573         if (unlocked) {
2574             qemu_mutex_unlock_iothread();
2575         }
2576     }
2577
2578     return release_lock;
2579 }
2580
2581 /* Called within RCU critical section.  */
2582 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2583                                                 MemTxAttrs attrs,
2584                                                 const uint8_t *buf,
2585                                                 int len, hwaddr addr1,
2586                                                 hwaddr l, MemoryRegion *mr)
2587 {
2588     uint8_t *ptr;
2589     uint64_t val;
2590     MemTxResult result = MEMTX_OK;
2591     bool release_lock = false;
2592
2593     for (;;) {
2594         if (!memory_access_is_direct(mr, true)) {
2595             release_lock |= prepare_mmio_access(mr);
2596             l = memory_access_size(mr, l, addr1);
2597             /* XXX: could force current_cpu to NULL to avoid
2598                potential bugs */
2599             switch (l) {
2600             case 8:
2601                 /* 64 bit write access */
2602                 val = ldq_p(buf);
2603                 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2604                                                        attrs);
2605                 break;
2606             case 4:
2607                 /* 32 bit write access */
2608                 val = ldl_p(buf);
2609                 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2610                                                        attrs);
2611                 break;
2612             case 2:
2613                 /* 16 bit write access */
2614                 val = lduw_p(buf);
2615                 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2616                                                        attrs);
2617                 break;
2618             case 1:
2619                 /* 8 bit write access */
2620                 val = ldub_p(buf);
2621                 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2622                                                        attrs);
2623                 break;
2624             default:
2625                 abort();
2626             }
2627         } else {
2628             /* RAM case */
2629             ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2630             memcpy(ptr, buf, l);
2631             invalidate_and_set_dirty(mr, addr1, l);
2632         }
2633
2634         if (release_lock) {
2635             qemu_mutex_unlock_iothread();
2636             release_lock = false;
2637         }
2638
2639         len -= l;
2640         buf += l;
2641         addr += l;
2642
2643         if (!len) {
2644             break;
2645         }
2646
2647         l = len;
2648         mr = address_space_translate(as, addr, &addr1, &l, true);
2649     }
2650
2651     return result;
2652 }
2653
2654 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2655                                 const uint8_t *buf, int len)
2656 {
2657     hwaddr l;
2658     hwaddr addr1;
2659     MemoryRegion *mr;
2660     MemTxResult result = MEMTX_OK;
2661
2662     if (len > 0) {
2663         rcu_read_lock();
2664         l = len;
2665         mr = address_space_translate(as, addr, &addr1, &l, true);
2666         result = address_space_write_continue(as, addr, attrs, buf, len,
2667                                               addr1, l, mr);
2668         rcu_read_unlock();
2669     }
2670
2671     return result;
2672 }
2673
2674 /* Called within RCU critical section.  */
2675 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2676                                         MemTxAttrs attrs, uint8_t *buf,
2677                                         int len, hwaddr addr1, hwaddr l,
2678                                         MemoryRegion *mr)
2679 {
2680     uint8_t *ptr;
2681     uint64_t val;
2682     MemTxResult result = MEMTX_OK;
2683     bool release_lock = false;
2684
2685     for (;;) {
2686         if (!memory_access_is_direct(mr, false)) {
2687             /* I/O case */
2688             release_lock |= prepare_mmio_access(mr);
2689             l = memory_access_size(mr, l, addr1);
2690             switch (l) {
2691             case 8:
2692                 /* 64 bit read access */
2693                 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2694                                                       attrs);
2695                 stq_p(buf, val);
2696                 break;
2697             case 4:
2698                 /* 32 bit read access */
2699                 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2700                                                       attrs);
2701                 stl_p(buf, val);
2702                 break;
2703             case 2:
2704                 /* 16 bit read access */
2705                 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2706                                                       attrs);
2707                 stw_p(buf, val);
2708                 break;
2709             case 1:
2710                 /* 8 bit read access */
2711                 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2712                                                       attrs);
2713                 stb_p(buf, val);
2714                 break;
2715             default:
2716                 abort();
2717             }
2718         } else {
2719             /* RAM case */
2720             ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2721             memcpy(buf, ptr, l);
2722         }
2723
2724         if (release_lock) {
2725             qemu_mutex_unlock_iothread();
2726             release_lock = false;
2727         }
2728
2729         len -= l;
2730         buf += l;
2731         addr += l;
2732
2733         if (!len) {
2734             break;
2735         }
2736
2737         l = len;
2738         mr = address_space_translate(as, addr, &addr1, &l, false);
2739     }
2740
2741     return result;
2742 }
2743
2744 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2745                                     MemTxAttrs attrs, uint8_t *buf, int len)
2746 {
2747     hwaddr l;
2748     hwaddr addr1;
2749     MemoryRegion *mr;
2750     MemTxResult result = MEMTX_OK;
2751
2752     if (len > 0) {
2753         rcu_read_lock();
2754         l = len;
2755         mr = address_space_translate(as, addr, &addr1, &l, false);
2756         result = address_space_read_continue(as, addr, attrs, buf, len,
2757                                              addr1, l, mr);
2758         rcu_read_unlock();
2759     }
2760
2761     return result;
2762 }
2763
2764 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2765                              uint8_t *buf, int len, bool is_write)
2766 {
2767     if (is_write) {
2768         return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2769     } else {
2770         return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2771     }
2772 }
2773
2774 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2775                             int len, int is_write)
2776 {
2777     address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2778                      buf, len, is_write);
2779 }
2780
2781 enum write_rom_type {
2782     WRITE_DATA,
2783     FLUSH_CACHE,
2784 };
2785
2786 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2787     hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2788 {
2789     hwaddr l;
2790     uint8_t *ptr;
2791     hwaddr addr1;
2792     MemoryRegion *mr;
2793
2794     rcu_read_lock();
2795     while (len > 0) {
2796         l = len;
2797         mr = address_space_translate(as, addr, &addr1, &l, true);
2798
2799         if (!(memory_region_is_ram(mr) ||
2800               memory_region_is_romd(mr))) {
2801             l = memory_access_size(mr, l, addr1);
2802         } else {
2803             /* ROM/RAM case */
2804             ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2805             switch (type) {
2806             case WRITE_DATA:
2807                 memcpy(ptr, buf, l);
2808                 invalidate_and_set_dirty(mr, addr1, l);
2809                 break;
2810             case FLUSH_CACHE:
2811                 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2812                 break;
2813             }
2814         }
2815         len -= l;
2816         buf += l;
2817         addr += l;
2818     }
2819     rcu_read_unlock();
2820 }
2821
2822 /* used for ROM loading : can write in RAM and ROM */
2823 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2824                                    const uint8_t *buf, int len)
2825 {
2826     cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2827 }
2828
2829 void cpu_flush_icache_range(hwaddr start, int len)
2830 {
2831     /*
2832      * This function should do the same thing as an icache flush that was
2833      * triggered from within the guest. For TCG we are always cache coherent,
2834      * so there is no need to flush anything. For KVM / Xen we need to flush
2835      * the host's instruction cache at least.
2836      */
2837     if (tcg_enabled()) {
2838         return;
2839     }
2840
2841     cpu_physical_memory_write_rom_internal(&address_space_memory,
2842                                            start, NULL, len, FLUSH_CACHE);
2843 }
2844
2845 typedef struct {
2846     MemoryRegion *mr;
2847     void *buffer;
2848     hwaddr addr;
2849     hwaddr len;
2850     bool in_use;
2851 } BounceBuffer;
2852
2853 static BounceBuffer bounce;
2854
2855 typedef struct MapClient {
2856     QEMUBH *bh;
2857     QLIST_ENTRY(MapClient) link;
2858 } MapClient;
2859
2860 QemuMutex map_client_list_lock;
2861 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2862     = QLIST_HEAD_INITIALIZER(map_client_list);
2863
2864 static void cpu_unregister_map_client_do(MapClient *client)
2865 {
2866     QLIST_REMOVE(client, link);
2867     g_free(client);
2868 }
2869
2870 static void cpu_notify_map_clients_locked(void)
2871 {
2872     MapClient *client;
2873
2874     while (!QLIST_EMPTY(&map_client_list)) {
2875         client = QLIST_FIRST(&map_client_list);
2876         qemu_bh_schedule(client->bh);
2877         cpu_unregister_map_client_do(client);
2878     }
2879 }
2880
2881 void cpu_register_map_client(QEMUBH *bh)
2882 {
2883     MapClient *client = g_malloc(sizeof(*client));
2884
2885     qemu_mutex_lock(&map_client_list_lock);
2886     client->bh = bh;
2887     QLIST_INSERT_HEAD(&map_client_list, client, link);
2888     if (!atomic_read(&bounce.in_use)) {
2889         cpu_notify_map_clients_locked();
2890     }
2891     qemu_mutex_unlock(&map_client_list_lock);
2892 }
2893
2894 void cpu_exec_init_all(void)
2895 {
2896     qemu_mutex_init(&ram_list.mutex);
2897     /* The data structures we set up here depend on knowing the page size,
2898      * so no more changes can be made after this point.
2899      * In an ideal world, nothing we did before we had finished the
2900      * machine setup would care about the target page size, and we could
2901      * do this much later, rather than requiring board models to state
2902      * up front what their requirements are.
2903      */
2904     finalize_target_page_bits();
2905     io_mem_init();
2906     memory_map_init();
2907     qemu_mutex_init(&map_client_list_lock);
2908 }
2909
2910 void cpu_unregister_map_client(QEMUBH *bh)
2911 {
2912     MapClient *client;
2913
2914     qemu_mutex_lock(&map_client_list_lock);
2915     QLIST_FOREACH(client, &map_client_list, link) {
2916         if (client->bh == bh) {
2917             cpu_unregister_map_client_do(client);
2918             break;
2919         }
2920     }
2921     qemu_mutex_unlock(&map_client_list_lock);
2922 }
2923
2924 static void cpu_notify_map_clients(void)
2925 {
2926     qemu_mutex_lock(&map_client_list_lock);
2927     cpu_notify_map_clients_locked();
2928     qemu_mutex_unlock(&map_client_list_lock);
2929 }
2930
2931 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2932 {
2933     MemoryRegion *mr;
2934     hwaddr l, xlat;
2935
2936     rcu_read_lock();
2937     while (len > 0) {
2938         l = len;
2939         mr = address_space_translate(as, addr, &xlat, &l, is_write);
2940         if (!memory_access_is_direct(mr, is_write)) {
2941             l = memory_access_size(mr, l, addr);
2942             if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2943                 return false;
2944             }
2945         }
2946
2947         len -= l;
2948         addr += l;
2949     }
2950     rcu_read_unlock();
2951     return true;
2952 }
2953
2954 /* Map a physical memory region into a host virtual address.
2955  * May map a subset of the requested range, given by and returned in *plen.
2956  * May return NULL if resources needed to perform the mapping are exhausted.
2957  * Use only for reads OR writes - not for read-modify-write operations.
2958  * Use cpu_register_map_client() to know when retrying the map operation is
2959  * likely to succeed.
2960  */
2961 void *address_space_map(AddressSpace *as,
2962                         hwaddr addr,
2963                         hwaddr *plen,
2964                         bool is_write)
2965 {
2966     hwaddr len = *plen;
2967     hwaddr done = 0;
2968     hwaddr l, xlat, base;
2969     MemoryRegion *mr, *this_mr;
2970     void *ptr;
2971
2972     if (len == 0) {
2973         return NULL;
2974     }
2975
2976     l = len;
2977     rcu_read_lock();
2978     mr = address_space_translate(as, addr, &xlat, &l, is_write);
2979
2980     if (!memory_access_is_direct(mr, is_write)) {
2981         if (atomic_xchg(&bounce.in_use, true)) {
2982             rcu_read_unlock();
2983             return NULL;
2984         }
2985         /* Avoid unbounded allocations */
2986         l = MIN(l, TARGET_PAGE_SIZE);
2987         bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2988         bounce.addr = addr;
2989         bounce.len = l;
2990
2991         memory_region_ref(mr);
2992         bounce.mr = mr;
2993         if (!is_write) {
2994             address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2995                                bounce.buffer, l);
2996         }
2997
2998         rcu_read_unlock();
2999         *plen = l;
3000         return bounce.buffer;
3001     }
3002
3003     base = xlat;
3004
3005     for (;;) {
3006         len -= l;
3007         addr += l;
3008         done += l;
3009         if (len == 0) {
3010             break;
3011         }
3012
3013         l = len;
3014         this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
3015         if (this_mr != mr || xlat != base + done) {
3016             break;
3017         }
3018     }
3019
3020     memory_region_ref(mr);
3021     *plen = done;
3022     ptr = qemu_ram_ptr_length(mr->ram_block, base, plen);
3023     rcu_read_unlock();
3024
3025     return ptr;
3026 }
3027
3028 /* Unmaps a memory region previously mapped by address_space_map().
3029  * Will also mark the memory as dirty if is_write == 1.  access_len gives
3030  * the amount of memory that was actually read or written by the caller.
3031  */
3032 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3033                          int is_write, hwaddr access_len)
3034 {
3035     if (buffer != bounce.buffer) {
3036         MemoryRegion *mr;
3037         ram_addr_t addr1;
3038
3039         mr = memory_region_from_host(buffer, &addr1);
3040         assert(mr != NULL);
3041         if (is_write) {
3042             invalidate_and_set_dirty(mr, addr1, access_len);
3043         }
3044         if (xen_enabled()) {
3045             xen_invalidate_map_cache_entry(buffer);
3046         }
3047         memory_region_unref(mr);
3048         return;
3049     }
3050     if (is_write) {
3051         address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
3052                             bounce.buffer, access_len);
3053     }
3054     qemu_vfree(bounce.buffer);
3055     bounce.buffer = NULL;
3056     memory_region_unref(bounce.mr);
3057     atomic_mb_set(&bounce.in_use, false);
3058     cpu_notify_map_clients();
3059 }
3060
3061 void *cpu_physical_memory_map(hwaddr addr,
3062                               hwaddr *plen,
3063                               int is_write)
3064 {
3065     return address_space_map(&address_space_memory, addr, plen, is_write);
3066 }
3067
3068 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3069                                int is_write, hwaddr access_len)
3070 {
3071     return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3072 }
3073
3074 /* warning: addr must be aligned */
3075 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3076                                                   MemTxAttrs attrs,
3077                                                   MemTxResult *result,
3078                                                   enum device_endian endian)
3079 {
3080     uint8_t *ptr;
3081     uint64_t val;
3082     MemoryRegion *mr;
3083     hwaddr l = 4;
3084     hwaddr addr1;
3085     MemTxResult r;
3086     bool release_lock = false;
3087
3088     rcu_read_lock();
3089     mr = address_space_translate(as, addr, &addr1, &l, false);
3090     if (l < 4 || !memory_access_is_direct(mr, false)) {
3091         release_lock |= prepare_mmio_access(mr);
3092
3093         /* I/O case */
3094         r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3095 #if defined(TARGET_WORDS_BIGENDIAN)
3096         if (endian == DEVICE_LITTLE_ENDIAN) {
3097             val = bswap32(val);
3098         }
3099 #else
3100         if (endian == DEVICE_BIG_ENDIAN) {
3101             val = bswap32(val);
3102         }
3103 #endif
3104     } else {
3105         /* RAM case */
3106         ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3107         switch (endian) {
3108         case DEVICE_LITTLE_ENDIAN:
3109             val = ldl_le_p(ptr);
3110             break;
3111         case DEVICE_BIG_ENDIAN:
3112             val = ldl_be_p(ptr);
3113             break;
3114         default:
3115             val = ldl_p(ptr);
3116             break;
3117         }
3118         r = MEMTX_OK;
3119     }
3120     if (result) {
3121         *result = r;
3122     }
3123     if (release_lock) {
3124         qemu_mutex_unlock_iothread();
3125     }
3126     rcu_read_unlock();
3127     return val;
3128 }
3129
3130 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3131                            MemTxAttrs attrs, MemTxResult *result)
3132 {
3133     return address_space_ldl_internal(as, addr, attrs, result,
3134                                       DEVICE_NATIVE_ENDIAN);
3135 }
3136
3137 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3138                               MemTxAttrs attrs, MemTxResult *result)
3139 {
3140     return address_space_ldl_internal(as, addr, attrs, result,
3141                                       DEVICE_LITTLE_ENDIAN);
3142 }
3143
3144 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3145                               MemTxAttrs attrs, MemTxResult *result)
3146 {
3147     return address_space_ldl_internal(as, addr, attrs, result,
3148                                       DEVICE_BIG_ENDIAN);
3149 }
3150
3151 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3152 {
3153     return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3154 }
3155
3156 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3157 {
3158     return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3159 }
3160
3161 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3162 {
3163     return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3164 }
3165
3166 /* warning: addr must be aligned */
3167 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3168                                                   MemTxAttrs attrs,
3169                                                   MemTxResult *result,
3170                                                   enum device_endian endian)
3171 {
3172     uint8_t *ptr;
3173     uint64_t val;
3174     MemoryRegion *mr;
3175     hwaddr l = 8;
3176     hwaddr addr1;
3177     MemTxResult r;
3178     bool release_lock = false;
3179
3180     rcu_read_lock();
3181     mr = address_space_translate(as, addr, &addr1, &l,
3182                                  false);
3183     if (l < 8 || !memory_access_is_direct(mr, false)) {
3184         release_lock |= prepare_mmio_access(mr);
3185
3186         /* I/O case */
3187         r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3188 #if defined(TARGET_WORDS_BIGENDIAN)
3189         if (endian == DEVICE_LITTLE_ENDIAN) {
3190             val = bswap64(val);
3191         }
3192 #else
3193         if (endian == DEVICE_BIG_ENDIAN) {
3194             val = bswap64(val);
3195         }
3196 #endif
3197     } else {
3198         /* RAM case */
3199         ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3200         switch (endian) {
3201         case DEVICE_LITTLE_ENDIAN:
3202             val = ldq_le_p(ptr);
3203             break;
3204         case DEVICE_BIG_ENDIAN:
3205             val = ldq_be_p(ptr);
3206             break;
3207         default:
3208             val = ldq_p(ptr);
3209             break;
3210         }
3211         r = MEMTX_OK;
3212     }
3213     if (result) {
3214         *result = r;
3215     }
3216     if (release_lock) {
3217         qemu_mutex_unlock_iothread();
3218     }
3219     rcu_read_unlock();
3220     return val;
3221 }
3222
3223 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3224                            MemTxAttrs attrs, MemTxResult *result)
3225 {
3226     return address_space_ldq_internal(as, addr, attrs, result,
3227                                       DEVICE_NATIVE_ENDIAN);
3228 }
3229
3230 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3231                            MemTxAttrs attrs, MemTxResult *result)
3232 {
3233     return address_space_ldq_internal(as, addr, attrs, result,
3234                                       DEVICE_LITTLE_ENDIAN);
3235 }
3236
3237 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3238                            MemTxAttrs attrs, MemTxResult *result)
3239 {
3240     return address_space_ldq_internal(as, addr, attrs, result,
3241                                       DEVICE_BIG_ENDIAN);
3242 }
3243
3244 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3245 {
3246     return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3247 }
3248
3249 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3250 {
3251     return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3252 }
3253
3254 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3255 {
3256     return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3257 }
3258
3259 /* XXX: optimize */
3260 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3261                             MemTxAttrs attrs, MemTxResult *result)
3262 {
3263     uint8_t val;
3264     MemTxResult r;
3265
3266     r = address_space_rw(as, addr, attrs, &val, 1, 0);
3267     if (result) {
3268         *result = r;
3269     }
3270     return val;
3271 }
3272
3273 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3274 {
3275     return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3276 }
3277
3278 /* warning: addr must be aligned */
3279 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3280                                                    hwaddr addr,
3281                                                    MemTxAttrs attrs,
3282                                                    MemTxResult *result,
3283                                                    enum device_endian endian)
3284 {
3285     uint8_t *ptr;
3286     uint64_t val;
3287     MemoryRegion *mr;
3288     hwaddr l = 2;
3289     hwaddr addr1;
3290     MemTxResult r;
3291     bool release_lock = false;
3292
3293     rcu_read_lock();
3294     mr = address_space_translate(as, addr, &addr1, &l,
3295                                  false);
3296     if (l < 2 || !memory_access_is_direct(mr, false)) {
3297         release_lock |= prepare_mmio_access(mr);
3298
3299         /* I/O case */
3300         r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3301 #if defined(TARGET_WORDS_BIGENDIAN)
3302         if (endian == DEVICE_LITTLE_ENDIAN) {
3303             val = bswap16(val);
3304         }
3305 #else
3306         if (endian == DEVICE_BIG_ENDIAN) {
3307             val = bswap16(val);
3308         }
3309 #endif
3310     } else {
3311         /* RAM case */
3312         ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3313         switch (endian) {
3314         case DEVICE_LITTLE_ENDIAN:
3315             val = lduw_le_p(ptr);
3316             break;
3317         case DEVICE_BIG_ENDIAN:
3318             val = lduw_be_p(ptr);
3319             break;
3320         default:
3321             val = lduw_p(ptr);
3322             break;
3323         }
3324         r = MEMTX_OK;
3325     }
3326     if (result) {
3327         *result = r;
3328     }
3329     if (release_lock) {
3330         qemu_mutex_unlock_iothread();
3331     }
3332     rcu_read_unlock();
3333     return val;
3334 }
3335
3336 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3337                            MemTxAttrs attrs, MemTxResult *result)
3338 {
3339     return address_space_lduw_internal(as, addr, attrs, result,
3340                                        DEVICE_NATIVE_ENDIAN);
3341 }
3342
3343 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3344                            MemTxAttrs attrs, MemTxResult *result)
3345 {
3346     return address_space_lduw_internal(as, addr, attrs, result,
3347                                        DEVICE_LITTLE_ENDIAN);
3348 }
3349
3350 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3351                            MemTxAttrs attrs, MemTxResult *result)
3352 {
3353     return address_space_lduw_internal(as, addr, attrs, result,
3354                                        DEVICE_BIG_ENDIAN);
3355 }
3356
3357 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3358 {
3359     return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3360 }
3361
3362 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3363 {
3364     return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3365 }
3366
3367 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3368 {
3369     return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3370 }
3371
3372 /* warning: addr must be aligned. The ram page is not masked as dirty
3373    and the code inside is not invalidated. It is useful if the dirty
3374    bits are used to track modified PTEs */
3375 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3376                                 MemTxAttrs attrs, MemTxResult *result)
3377 {
3378     uint8_t *ptr;
3379     MemoryRegion *mr;
3380     hwaddr l = 4;
3381     hwaddr addr1;
3382     MemTxResult r;
3383     uint8_t dirty_log_mask;
3384     bool release_lock = false;
3385
3386     rcu_read_lock();
3387     mr = address_space_translate(as, addr, &addr1, &l,
3388                                  true);
3389     if (l < 4 || !memory_access_is_direct(mr, true)) {
3390         release_lock |= prepare_mmio_access(mr);
3391
3392         r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3393     } else {
3394         ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3395         stl_p(ptr, val);
3396
3397         dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3398         dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3399         cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr,
3400                                             4, dirty_log_mask);
3401         r = MEMTX_OK;
3402     }
3403     if (result) {
3404         *result = r;
3405     }
3406     if (release_lock) {
3407         qemu_mutex_unlock_iothread();
3408     }
3409     rcu_read_unlock();
3410 }
3411
3412 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3413 {
3414     address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3415 }
3416
3417 /* warning: addr must be aligned */
3418 static inline void address_space_stl_internal(AddressSpace *as,
3419                                               hwaddr addr, uint32_t val,
3420                                               MemTxAttrs attrs,
3421                                               MemTxResult *result,
3422                                               enum device_endian endian)
3423 {
3424     uint8_t *ptr;
3425     MemoryRegion *mr;
3426     hwaddr l = 4;
3427     hwaddr addr1;
3428     MemTxResult r;
3429     bool release_lock = false;
3430
3431     rcu_read_lock();
3432     mr = address_space_translate(as, addr, &addr1, &l,
3433                                  true);
3434     if (l < 4 || !memory_access_is_direct(mr, true)) {
3435         release_lock |= prepare_mmio_access(mr);
3436
3437 #if defined(TARGET_WORDS_BIGENDIAN)
3438         if (endian == DEVICE_LITTLE_ENDIAN) {
3439             val = bswap32(val);
3440         }
3441 #else
3442         if (endian == DEVICE_BIG_ENDIAN) {
3443             val = bswap32(val);
3444         }
3445 #endif
3446         r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3447     } else {
3448         /* RAM case */
3449         ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3450         switch (endian) {
3451         case DEVICE_LITTLE_ENDIAN:
3452             stl_le_p(ptr, val);
3453             break;
3454         case DEVICE_BIG_ENDIAN:
3455             stl_be_p(ptr, val);
3456             break;
3457         default:
3458             stl_p(ptr, val);
3459             break;
3460         }
3461         invalidate_and_set_dirty(mr, addr1, 4);
3462         r = MEMTX_OK;
3463     }
3464     if (result) {
3465         *result = r;
3466     }
3467     if (release_lock) {
3468         qemu_mutex_unlock_iothread();
3469     }
3470     rcu_read_unlock();
3471 }
3472
3473 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3474                        MemTxAttrs attrs, MemTxResult *result)
3475 {
3476     address_space_stl_internal(as, addr, val, attrs, result,
3477                                DEVICE_NATIVE_ENDIAN);
3478 }
3479
3480 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3481                        MemTxAttrs attrs, MemTxResult *result)
3482 {
3483     address_space_stl_internal(as, addr, val, attrs, result,
3484                                DEVICE_LITTLE_ENDIAN);
3485 }
3486
3487 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3488                        MemTxAttrs attrs, MemTxResult *result)
3489 {
3490     address_space_stl_internal(as, addr, val, attrs, result,
3491                                DEVICE_BIG_ENDIAN);
3492 }
3493
3494 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3495 {
3496     address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3497 }
3498
3499 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3500 {
3501     address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3502 }
3503
3504 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3505 {
3506     address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3507 }
3508
3509 /* XXX: optimize */
3510 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3511                        MemTxAttrs attrs, MemTxResult *result)
3512 {
3513     uint8_t v = val;
3514     MemTxResult r;
3515
3516     r = address_space_rw(as, addr, attrs, &v, 1, 1);
3517     if (result) {
3518         *result = r;
3519     }
3520 }
3521
3522 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3523 {
3524     address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3525 }
3526
3527 /* warning: addr must be aligned */
3528 static inline void address_space_stw_internal(AddressSpace *as,
3529                                               hwaddr addr, uint32_t val,
3530                                               MemTxAttrs attrs,
3531                                               MemTxResult *result,
3532                                               enum device_endian endian)
3533 {
3534     uint8_t *ptr;
3535     MemoryRegion *mr;
3536     hwaddr l = 2;
3537     hwaddr addr1;
3538     MemTxResult r;
3539     bool release_lock = false;
3540
3541     rcu_read_lock();
3542     mr = address_space_translate(as, addr, &addr1, &l, true);
3543     if (l < 2 || !memory_access_is_direct(mr, true)) {
3544         release_lock |= prepare_mmio_access(mr);
3545
3546 #if defined(TARGET_WORDS_BIGENDIAN)
3547         if (endian == DEVICE_LITTLE_ENDIAN) {
3548             val = bswap16(val);
3549         }
3550 #else
3551         if (endian == DEVICE_BIG_ENDIAN) {
3552             val = bswap16(val);
3553         }
3554 #endif
3555         r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3556     } else {
3557         /* RAM case */
3558         ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3559         switch (endian) {
3560         case DEVICE_LITTLE_ENDIAN:
3561             stw_le_p(ptr, val);
3562             break;
3563         case DEVICE_BIG_ENDIAN:
3564             stw_be_p(ptr, val);
3565             break;
3566         default:
3567             stw_p(ptr, val);
3568             break;
3569         }
3570         invalidate_and_set_dirty(mr, addr1, 2);
3571         r = MEMTX_OK;
3572     }
3573     if (result) {
3574         *result = r;
3575     }
3576     if (release_lock) {
3577         qemu_mutex_unlock_iothread();
3578     }
3579     rcu_read_unlock();
3580 }
3581
3582 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3583                        MemTxAttrs attrs, MemTxResult *result)
3584 {
3585     address_space_stw_internal(as, addr, val, attrs, result,
3586                                DEVICE_NATIVE_ENDIAN);
3587 }
3588
3589 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3590                        MemTxAttrs attrs, MemTxResult *result)
3591 {
3592     address_space_stw_internal(as, addr, val, attrs, result,
3593                                DEVICE_LITTLE_ENDIAN);
3594 }
3595
3596 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3597                        MemTxAttrs attrs, MemTxResult *result)
3598 {
3599     address_space_stw_internal(as, addr, val, attrs, result,
3600                                DEVICE_BIG_ENDIAN);
3601 }
3602
3603 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3604 {
3605     address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3606 }
3607
3608 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3609 {
3610     address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3611 }
3612
3613 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3614 {
3615     address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3616 }
3617
3618 /* XXX: optimize */
3619 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3620                        MemTxAttrs attrs, MemTxResult *result)
3621 {
3622     MemTxResult r;
3623     val = tswap64(val);
3624     r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3625     if (result) {
3626         *result = r;
3627     }
3628 }
3629
3630 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3631                        MemTxAttrs attrs, MemTxResult *result)
3632 {
3633     MemTxResult r;
3634     val = cpu_to_le64(val);
3635     r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3636     if (result) {
3637         *result = r;
3638     }
3639 }
3640 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3641                        MemTxAttrs attrs, MemTxResult *result)
3642 {
3643     MemTxResult r;
3644     val = cpu_to_be64(val);
3645     r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3646     if (result) {
3647         *result = r;
3648     }
3649 }
3650
3651 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3652 {
3653     address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3654 }
3655
3656 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3657 {
3658     address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3659 }
3660
3661 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3662 {
3663     address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3664 }
3665
3666 /* virtual memory access for debug (includes writing to ROM) */
3667 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3668                         uint8_t *buf, int len, int is_write)
3669 {
3670     int l;
3671     hwaddr phys_addr;
3672     target_ulong page;
3673
3674     while (len > 0) {
3675         int asidx;
3676         MemTxAttrs attrs;
3677
3678         page = addr & TARGET_PAGE_MASK;
3679         phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3680         asidx = cpu_asidx_from_attrs(cpu, attrs);
3681         /* if no physical page mapped, return an error */
3682         if (phys_addr == -1)
3683             return -1;
3684         l = (page + TARGET_PAGE_SIZE) - addr;
3685         if (l > len)
3686             l = len;
3687         phys_addr += (addr & ~TARGET_PAGE_MASK);
3688         if (is_write) {
3689             cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3690                                           phys_addr, buf, l);
3691         } else {
3692             address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3693                              MEMTXATTRS_UNSPECIFIED,
3694                              buf, l, 0);
3695         }
3696         len -= l;
3697         buf += l;
3698         addr += l;
3699     }
3700     return 0;
3701 }
3702
3703 /*
3704  * Allows code that needs to deal with migration bitmaps etc to still be built
3705  * target independent.
3706  */
3707 size_t qemu_target_page_bits(void)
3708 {
3709     return TARGET_PAGE_BITS;
3710 }
3711
3712 #endif
3713
3714 /*
3715  * A helper function for the _utterly broken_ virtio device model to find out if
3716  * it's running on a big endian machine. Don't do this at home kids!
3717  */
3718 bool target_words_bigendian(void);
3719 bool target_words_bigendian(void)
3720 {
3721 #if defined(TARGET_WORDS_BIGENDIAN)
3722     return true;
3723 #else
3724     return false;
3725 #endif
3726 }
3727
3728 #ifndef CONFIG_USER_ONLY
3729 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3730 {
3731     MemoryRegion*mr;
3732     hwaddr l = 1;
3733     bool res;
3734
3735     rcu_read_lock();
3736     mr = address_space_translate(&address_space_memory,
3737                                  phys_addr, &phys_addr, &l, false);
3738
3739     res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3740     rcu_read_unlock();
3741     return res;
3742 }
3743
3744 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3745 {
3746     RAMBlock *block;
3747     int ret = 0;
3748
3749     rcu_read_lock();
3750     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3751         ret = func(block->idstr, block->host, block->offset,
3752                    block->used_length, opaque);
3753         if (ret) {
3754             break;
3755         }
3756     }
3757     rcu_read_unlock();
3758     return ret;
3759 }
3760 #endif