exec.c

   1 /*
   2  *  Virtual page mapping
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include "qemu/osdep.h"
  20 #include "qapi/error.h"
  21 #ifndef _WIN32
  22 #endif
  23
  24 #include "qemu/cutils.h"
  25 #include "cpu.h"
  26 #include "exec/exec-all.h"
  27 #include "tcg.h"
  28 #include "hw/qdev-core.h"
  29 #if !defined(CONFIG_USER_ONLY)
  30 #include "hw/boards.h"
  31 #include "hw/xen/xen.h"
  32 #endif
  33 #include "sysemu/kvm.h"
  34 #include "sysemu/sysemu.h"
  35 #include "qemu/timer.h"
  36 #include "qemu/config-file.h"
  37 #include "qemu/error-report.h"
  38 #if defined(CONFIG_USER_ONLY)
  39 #include "qemu.h"
  40 #else /* !CONFIG_USER_ONLY */
  41 #include "hw/hw.h"
  42 #include "exec/memory.h"
  43 #include "exec/ioport.h"
  44 #include "sysemu/dma.h"
  45 #include "exec/address-spaces.h"
  46 #include "sysemu/xen-mapcache.h"
  47 #include "trace.h"
  48 #endif
  49 #include "exec/cpu-all.h"
  50 #include "qemu/rcu_queue.h"
  51 #include "qemu/main-loop.h"
  52 #include "translate-all.h"
  53 #include "sysemu/replay.h"
  54
  55 #include "exec/memory-internal.h"
  56 #include "exec/ram_addr.h"
  57 #include "exec/log.h"
  58
  59 #include "migration/vmstate.h"
  60
  61 #include "qemu/range.h"
  62 #ifndef _WIN32
  63 #include "qemu/mmap-alloc.h"
  64 #endif
  65
  66 //#define DEBUG_SUBPAGE
  67
  68 #if !defined(CONFIG_USER_ONLY)
  69 /* ram_list is read under rcu_read_lock()/rcu_read_unlock().  Writes
  70  * are protected by the ramlist lock.
  71  */
  72 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
  73
  74 static MemoryRegion *system_memory;
  75 static MemoryRegion *system_io;
  76
  77 AddressSpace address_space_io;
  78 AddressSpace address_space_memory;
  79
  80 MemoryRegion io_mem_rom, io_mem_notdirty;
  81 static MemoryRegion io_mem_unassigned;
  82
  83 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
  84 #define RAM_PREALLOC   (1 << 0)
  85
  86 /* RAM is mmap-ed with MAP_SHARED */
  87 #define RAM_SHARED     (1 << 1)
  88
  89 /* Only a portion of RAM (used_length) is actually used, and migrated.
  90  * This used_length size can change across reboots.
  91  */
  92 #define RAM_RESIZEABLE (1 << 2)
  93
  94 #endif
  95
  96 #ifdef TARGET_PAGE_BITS_VARY
  97 int target_page_bits;
  98 bool target_page_bits_decided;
  99 #endif
 100
 101 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
 102 /* current CPU in the current thread. It is only valid inside
 103    cpu_exec() */
 104 __thread CPUState *current_cpu;
 105 /* 0 = Do not count executed instructions.
 106    1 = Precise instruction counting.
 107    2 = Adaptive rate instruction counting.  */
 108 int use_icount;
 109
 110 bool set_preferred_target_page_bits(int bits)
 111 {
 112     /* The target page size is the lowest common denominator for all
 113      * the CPUs in the system, so we can only make it smaller, never
 114      * larger. And we can't make it smaller once we've committed to
 115      * a particular size.
 116      */
 117 #ifdef TARGET_PAGE_BITS_VARY
 118     assert(bits >= TARGET_PAGE_BITS_MIN);
 119     if (target_page_bits == 0 || target_page_bits > bits) {
 120         if (target_page_bits_decided) {
 121             return false;
 122         }
 123         target_page_bits = bits;
 124     }
 125 #endif
 126     return true;
 127 }
 128
 129 #if !defined(CONFIG_USER_ONLY)
 130
 131 static void finalize_target_page_bits(void)
 132 {
 133 #ifdef TARGET_PAGE_BITS_VARY
 134     if (target_page_bits == 0) {
 135         target_page_bits = TARGET_PAGE_BITS_MIN;
 136     }
 137     target_page_bits_decided = true;
 138 #endif
 139 }
 140
 141 typedef struct PhysPageEntry PhysPageEntry;
 142
 143 struct PhysPageEntry {
 144     /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
 145     uint32_t skip : 6;
 146      /* index into phys_sections (!skip) or phys_map_nodes (skip) */
 147     uint32_t ptr : 26;
 148 };
 149
 150 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
 151
 152 /* Size of the L2 (and L3, etc) page tables.  */
 153 #define ADDR_SPACE_BITS 64
 154
 155 #define P_L2_BITS 9
 156 #define P_L2_SIZE (1 << P_L2_BITS)
 157
 158 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
 159
 160 typedef PhysPageEntry Node[P_L2_SIZE];
 161
 162 typedef struct PhysPageMap {
 163     struct rcu_head rcu;
 164
 165     unsigned sections_nb;
 166     unsigned sections_nb_alloc;
 167     unsigned nodes_nb;
 168     unsigned nodes_nb_alloc;
 169     Node *nodes;
 170     MemoryRegionSection *sections;
 171 } PhysPageMap;
 172
 173 struct AddressSpaceDispatch {
 174     struct rcu_head rcu;
 175
 176     MemoryRegionSection *mru_section;
 177     /* This is a multi-level map on the physical address space.
 178      * The bottom level has pointers to MemoryRegionSections.
 179      */
 180     PhysPageEntry phys_map;
 181     PhysPageMap map;
 182     AddressSpace *as;
 183 };
 184
 185 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
 186 typedef struct subpage_t {
 187     MemoryRegion iomem;
 188     AddressSpace *as;
 189     hwaddr base;
 190     uint16_t sub_section[];
 191 } subpage_t;
 192
 193 #define PHYS_SECTION_UNASSIGNED 0
 194 #define PHYS_SECTION_NOTDIRTY 1
 195 #define PHYS_SECTION_ROM 2
 196 #define PHYS_SECTION_WATCH 3
 197
 198 static void io_mem_init(void);
 199 static void memory_map_init(void);
 200 static void tcg_commit(MemoryListener *listener);
 201
 202 static MemoryRegion io_mem_watch;
 203
 204 /**
 205  * CPUAddressSpace: all the information a CPU needs about an AddressSpace
 206  * @cpu: the CPU whose AddressSpace this is
 207  * @as: the AddressSpace itself
 208  * @memory_dispatch: its dispatch pointer (cached, RCU protected)
 209  * @tcg_as_listener: listener for tracking changes to the AddressSpace
 210  */
 211 struct CPUAddressSpace {
 212     CPUState *cpu;
 213     AddressSpace *as;
 214     struct AddressSpaceDispatch *memory_dispatch;
 215     MemoryListener tcg_as_listener;
 216 };
 217
 218 #endif
 219
 220 #if !defined(CONFIG_USER_ONLY)
 221
 222 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
 223 {
 224     static unsigned alloc_hint = 16;
 225     if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
 226         map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, alloc_hint);
 227         map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
 228         map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
 229         alloc_hint = map->nodes_nb_alloc;
 230     }
 231 }
 232
 233 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
 234 {
 235     unsigned i;
 236     uint32_t ret;
 237     PhysPageEntry e;
 238     PhysPageEntry *p;
 239
 240     ret = map->nodes_nb++;
 241     p = map->nodes[ret];
 242     assert(ret != PHYS_MAP_NODE_NIL);
 243     assert(ret != map->nodes_nb_alloc);
 244
 245     e.skip = leaf ? 0 : 1;
 246     e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
 247     for (i = 0; i < P_L2_SIZE; ++i) {
 248         memcpy(&p[i], &e, sizeof(e));
 249     }
 250     return ret;
 251 }
 252
 253 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
 254                                 hwaddr *index, hwaddr *nb, uint16_t leaf,
 255                                 int level)
 256 {
 257     PhysPageEntry *p;
 258     hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
 259
 260     if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
 261         lp->ptr = phys_map_node_alloc(map, level == 0);
 262     }
 263     p = map->nodes[lp->ptr];
 264     lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
 265
 266     while (*nb && lp < &p[P_L2_SIZE]) {
 267         if ((*index & (step - 1)) == 0 && *nb >= step) {
 268             lp->skip = 0;
 269             lp->ptr = leaf;
 270             *index += step;
 271             *nb -= step;
 272         } else {
 273             phys_page_set_level(map, lp, index, nb, leaf, level - 1);
 274         }
 275         ++lp;
 276     }
 277 }
 278
 279 static void phys_page_set(AddressSpaceDispatch *d,
 280                           hwaddr index, hwaddr nb,
 281                           uint16_t leaf)
 282 {
 283     /* Wildly overreserve - it doesn't matter much. */
 284     phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
 285
 286     phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
 287 }
 288
 289 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
 290  * and update our entry so we can skip it and go directly to the destination.
 291  */
 292 static void phys_page_compact(PhysPageEntry *lp, Node *nodes)
 293 {
 294     unsigned valid_ptr = P_L2_SIZE;
 295     int valid = 0;
 296     PhysPageEntry *p;
 297     int i;
 298
 299     if (lp->ptr == PHYS_MAP_NODE_NIL) {
 300         return;
 301     }
 302
 303     p = nodes[lp->ptr];
 304     for (i = 0; i < P_L2_SIZE; i++) {
 305         if (p[i].ptr == PHYS_MAP_NODE_NIL) {
 306             continue;
 307         }
 308
 309         valid_ptr = i;
 310         valid++;
 311         if (p[i].skip) {
 312             phys_page_compact(&p[i], nodes);
 313         }
 314     }
 315
 316     /* We can only compress if there's only one child. */
 317     if (valid != 1) {
 318         return;
 319     }
 320
 321     assert(valid_ptr < P_L2_SIZE);
 322
 323     /* Don't compress if it won't fit in the # of bits we have. */
 324     if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
 325         return;
 326     }
 327
 328     lp->ptr = p[valid_ptr].ptr;
 329     if (!p[valid_ptr].skip) {
 330         /* If our only child is a leaf, make this a leaf. */
 331         /* By design, we should have made this node a leaf to begin with so we
 332          * should never reach here.
 333          * But since it's so simple to handle this, let's do it just in case we
 334          * change this rule.
 335          */
 336         lp->skip = 0;
 337     } else {
 338         lp->skip += p[valid_ptr].skip;
 339     }
 340 }
 341
 342 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
 343 {
 344     if (d->phys_map.skip) {
 345         phys_page_compact(&d->phys_map, d->map.nodes);
 346     }
 347 }
 348
 349 static inline bool section_covers_addr(const MemoryRegionSection *section,
 350                                        hwaddr addr)
 351 {
 352     /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
 353      * the section must cover the entire address space.
 354      */
 355     return int128_gethi(section->size) ||
 356            range_covers_byte(section->offset_within_address_space,
 357                              int128_getlo(section->size), addr);
 358 }
 359
 360 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
 361                                            Node *nodes, MemoryRegionSection *sections)
 362 {
 363     PhysPageEntry *p;
 364     hwaddr index = addr >> TARGET_PAGE_BITS;
 365     int i;
 366
 367     for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
 368         if (lp.ptr == PHYS_MAP_NODE_NIL) {
 369             return &sections[PHYS_SECTION_UNASSIGNED];
 370         }
 371         p = nodes[lp.ptr];
 372         lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
 373     }
 374
 375     if (section_covers_addr(&sections[lp.ptr], addr)) {
 376         return &sections[lp.ptr];
 377     } else {
 378         return &sections[PHYS_SECTION_UNASSIGNED];
 379     }
 380 }
 381
 382 bool memory_region_is_unassigned(MemoryRegion *mr)
 383 {
 384     return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
 385         && mr != &io_mem_watch;
 386 }
 387
 388 /* Called from RCU critical section */
 389 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
 390                                                         hwaddr addr,
 391                                                         bool resolve_subpage)
 392 {
 393     MemoryRegionSection *section = atomic_read(&d->mru_section);
 394     subpage_t *subpage;
 395     bool update;
 396
 397     if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
 398         section_covers_addr(section, addr)) {
 399         update = false;
 400     } else {
 401         section = phys_page_find(d->phys_map, addr, d->map.nodes,
 402                                  d->map.sections);
 403         update = true;
 404     }
 405     if (resolve_subpage && section->mr->subpage) {
 406         subpage = container_of(section->mr, subpage_t, iomem);
 407         section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
 408     }
 409     if (update) {
 410         atomic_set(&d->mru_section, section);
 411     }
 412     return section;
 413 }
 414
 415 /* Called from RCU critical section */
 416 static MemoryRegionSection *
 417 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
 418                                  hwaddr *plen, bool resolve_subpage)
 419 {
 420     MemoryRegionSection *section;
 421     MemoryRegion *mr;
 422     Int128 diff;
 423
 424     section = address_space_lookup_region(d, addr, resolve_subpage);
 425     /* Compute offset within MemoryRegionSection */
 426     addr -= section->offset_within_address_space;
 427
 428     /* Compute offset within MemoryRegion */
 429     *xlat = addr + section->offset_within_region;
 430
 431     mr = section->mr;
 432
 433     /* MMIO registers can be expected to perform full-width accesses based only
 434      * on their address, without considering adjacent registers that could
 435      * decode to completely different MemoryRegions.  When such registers
 436      * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
 437      * regions overlap wildly.  For this reason we cannot clamp the accesses
 438      * here.
 439      *
 440      * If the length is small (as is the case for address_space_ldl/stl),
 441      * everything works fine.  If the incoming length is large, however,
 442      * the caller really has to do the clamping through memory_access_size.
 443      */
 444     if (memory_region_is_ram(mr)) {
 445         diff = int128_sub(section->size, int128_make64(addr));
 446         *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
 447     }
 448     return section;
 449 }
 450
 451 /* Called from RCU critical section */
 452 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
 453                                       hwaddr *xlat, hwaddr *plen,
 454                                       bool is_write)
 455 {
 456     IOMMUTLBEntry iotlb;
 457     MemoryRegionSection *section;
 458     MemoryRegion *mr;
 459
 460     for (;;) {
 461         AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
 462         section = address_space_translate_internal(d, addr, &addr, plen, true);
 463         mr = section->mr;
 464
 465         if (!mr->iommu_ops) {
 466             break;
 467         }
 468
 469         iotlb = mr->iommu_ops->translate(mr, addr, is_write);
 470         addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
 471                 | (addr & iotlb.addr_mask));
 472         *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
 473         if (!(iotlb.perm & (1 << is_write))) {
 474             mr = &io_mem_unassigned;
 475             break;
 476         }
 477
 478         as = iotlb.target_as;
 479     }
 480
 481     if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
 482         hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
 483         *plen = MIN(page, *plen);
 484     }
 485
 486     *xlat = addr;
 487     return mr;
 488 }
 489
 490 /* Called from RCU critical section */
 491 MemoryRegionSection *
 492 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
 493                                   hwaddr *xlat, hwaddr *plen)
 494 {
 495     MemoryRegionSection *section;
 496     AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;
 497
 498     section = address_space_translate_internal(d, addr, xlat, plen, false);
 499
 500     assert(!section->mr->iommu_ops);
 501     return section;
 502 }
 503 #endif
 504
 505 #if !defined(CONFIG_USER_ONLY)
 506
 507 static int cpu_common_post_load(void *opaque, int version_id)
 508 {
 509     CPUState *cpu = opaque;
 510
 511     /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
 512        version_id is increased. */
 513     cpu->interrupt_request &= ~0x01;
 514     tlb_flush(cpu, 1);
 515
 516     return 0;
 517 }
 518
 519 static int cpu_common_pre_load(void *opaque)
 520 {
 521     CPUState *cpu = opaque;
 522
 523     cpu->exception_index = -1;
 524
 525     return 0;
 526 }
 527
 528 static bool cpu_common_exception_index_needed(void *opaque)
 529 {
 530     CPUState *cpu = opaque;
 531
 532     return tcg_enabled() && cpu->exception_index != -1;
 533 }
 534
 535 static const VMStateDescription vmstate_cpu_common_exception_index = {
 536     .name = "cpu_common/exception_index",
 537     .version_id = 1,
 538     .minimum_version_id = 1,
 539     .needed = cpu_common_exception_index_needed,
 540     .fields = (VMStateField[]) {
 541         VMSTATE_INT32(exception_index, CPUState),
 542         VMSTATE_END_OF_LIST()
 543     }
 544 };
 545
 546 static bool cpu_common_crash_occurred_needed(void *opaque)
 547 {
 548     CPUState *cpu = opaque;
 549
 550     return cpu->crash_occurred;
 551 }
 552
 553 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
 554     .name = "cpu_common/crash_occurred",
 555     .version_id = 1,
 556     .minimum_version_id = 1,
 557     .needed = cpu_common_crash_occurred_needed,
 558     .fields = (VMStateField[]) {
 559         VMSTATE_BOOL(crash_occurred, CPUState),
 560         VMSTATE_END_OF_LIST()
 561     }
 562 };
 563
 564 const VMStateDescription vmstate_cpu_common = {
 565     .name = "cpu_common",
 566     .version_id = 1,
 567     .minimum_version_id = 1,
 568     .pre_load = cpu_common_pre_load,
 569     .post_load = cpu_common_post_load,
 570     .fields = (VMStateField[]) {
 571         VMSTATE_UINT32(halted, CPUState),
 572         VMSTATE_UINT32(interrupt_request, CPUState),
 573         VMSTATE_END_OF_LIST()
 574     },
 575     .subsections = (const VMStateDescription*[]) {
 576         &vmstate_cpu_common_exception_index,
 577         &vmstate_cpu_common_crash_occurred,
 578         NULL
 579     }
 580 };
 581
 582 #endif
 583
 584 CPUState *qemu_get_cpu(int index)
 585 {
 586     CPUState *cpu;
 587
 588     CPU_FOREACH(cpu) {
 589         if (cpu->cpu_index == index) {
 590             return cpu;
 591         }
 592     }
 593
 594     return NULL;
 595 }
 596
 597 #if !defined(CONFIG_USER_ONLY)
 598 void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
 599 {
 600     CPUAddressSpace *newas;
 601
 602     /* Target code should have set num_ases before calling us */
 603     assert(asidx < cpu->num_ases);
 604
 605     if (asidx == 0) {
 606         /* address space 0 gets the convenience alias */
 607         cpu->as = as;
 608     }
 609
 610     /* KVM cannot currently support multiple address spaces. */
 611     assert(asidx == 0 || !kvm_enabled());
 612
 613     if (!cpu->cpu_ases) {
 614         cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
 615     }
 616
 617     newas = &cpu->cpu_ases[asidx];
 618     newas->cpu = cpu;
 619     newas->as = as;
 620     if (tcg_enabled()) {
 621         newas->tcg_as_listener.commit = tcg_commit;
 622         memory_listener_register(&newas->tcg_as_listener, as);
 623     }
 624 }
 625
 626 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
 627 {
 628     /* Return the AddressSpace corresponding to the specified index */
 629     return cpu->cpu_ases[asidx].as;
 630 }
 631 #endif
 632
 633 void cpu_exec_unrealizefn(CPUState *cpu)
 634 {
 635     CPUClass *cc = CPU_GET_CLASS(cpu);
 636
 637     cpu_list_remove(cpu);
 638
 639     if (cc->vmsd != NULL) {
 640         vmstate_unregister(NULL, cc->vmsd, cpu);
 641     }
 642     if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
 643         vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
 644     }
 645 }
 646
 647 void cpu_exec_initfn(CPUState *cpu)
 648 {
 649     cpu->as = NULL;
 650     cpu->num_ases = 0;
 651
 652 #ifndef CONFIG_USER_ONLY
 653     cpu->thread_id = qemu_get_thread_id();
 654
 655     /* This is a softmmu CPU object, so create a property for it
 656      * so users can wire up its memory. (This can't go in qom/cpu.c
 657      * because that file is compiled only once for both user-mode
 658      * and system builds.) The default if no link is set up is to use
 659      * the system address space.
 660      */
 661     object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
 662                              (Object **)&cpu->memory,
 663                              qdev_prop_allow_set_link_before_realize,
 664                              OBJ_PROP_LINK_UNREF_ON_RELEASE,
 665                              &error_abort);
 666     cpu->memory = system_memory;
 667     object_ref(OBJECT(cpu->memory));
 668 #endif
 669 }
 670
 671 void cpu_exec_realizefn(CPUState *cpu, Error **errp)
 672 {
 673     CPUClass *cc ATTRIBUTE_UNUSED = CPU_GET_CLASS(cpu);
 674
 675     cpu_list_add(cpu);
 676
 677 #ifndef CONFIG_USER_ONLY
 678     if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
 679         vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
 680     }
 681     if (cc->vmsd != NULL) {
 682         vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
 683     }
 684 #endif
 685 }
 686
 687 #if defined(CONFIG_USER_ONLY)
 688 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 689 {
 690     mmap_lock();
 691     tb_lock();
 692     tb_invalidate_phys_page_range(pc, pc + 1, 0);
 693     tb_unlock();
 694     mmap_unlock();
 695 }
 696 #else
 697 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 698 {
 699     MemTxAttrs attrs;
 700     hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
 701     int asidx = cpu_asidx_from_attrs(cpu, attrs);
 702     if (phys != -1) {
 703         /* Locks grabbed by tb_invalidate_phys_addr */
 704         tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
 705                                 phys | (pc & ~TARGET_PAGE_MASK));
 706     }
 707 }
 708 #endif
 709
 710 #if defined(CONFIG_USER_ONLY)
 711 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 712
 713 {
 714 }
 715
 716 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 717                           int flags)
 718 {
 719     return -ENOSYS;
 720 }
 721
 722 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 723 {
 724 }
 725
 726 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 727                           int flags, CPUWatchpoint **watchpoint)
 728 {
 729     return -ENOSYS;
 730 }
 731 #else
 732 /* Add a watchpoint.  */
 733 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 734                           int flags, CPUWatchpoint **watchpoint)
 735 {
 736     CPUWatchpoint *wp;
 737
 738     /* forbid ranges which are empty or run off the end of the address space */
 739     if (len == 0 || (addr + len - 1) < addr) {
 740         error_report("tried to set invalid watchpoint at %"
 741                      VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
 742         return -EINVAL;
 743     }
 744     wp = g_malloc(sizeof(*wp));
 745
 746     wp->vaddr = addr;
 747     wp->len = len;
 748     wp->flags = flags;
 749
 750     /* keep all GDB-injected watchpoints in front */
 751     if (flags & BP_GDB) {
 752         QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
 753     } else {
 754         QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
 755     }
 756
 757     tlb_flush_page(cpu, addr);
 758
 759     if (watchpoint)
 760         *watchpoint = wp;
 761     return 0;
 762 }
 763
 764 /* Remove a specific watchpoint.  */
 765 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 766                           int flags)
 767 {
 768     CPUWatchpoint *wp;
 769
 770     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
 771         if (addr == wp->vaddr && len == wp->len
 772                 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
 773             cpu_watchpoint_remove_by_ref(cpu, wp);
 774             return 0;
 775         }
 776     }
 777     return -ENOENT;
 778 }
 779
 780 /* Remove a specific watchpoint by reference.  */
 781 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 782 {
 783     QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
 784
 785     tlb_flush_page(cpu, watchpoint->vaddr);
 786
 787     g_free(watchpoint);
 788 }
 789
 790 /* Remove all matching watchpoints.  */
 791 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 792 {
 793     CPUWatchpoint *wp, *next;
 794
 795     QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
 796         if (wp->flags & mask) {
 797             cpu_watchpoint_remove_by_ref(cpu, wp);
 798         }
 799     }
 800 }
 801
 802 /* Return true if this watchpoint address matches the specified
 803  * access (ie the address range covered by the watchpoint overlaps
 804  * partially or completely with the address range covered by the
 805  * access).
 806  */
 807 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
 808                                                   vaddr addr,
 809                                                   vaddr len)
 810 {
 811     /* We know the lengths are non-zero, but a little caution is
 812      * required to avoid errors in the case where the range ends
 813      * exactly at the top of the address space and so addr + len
 814      * wraps round to zero.
 815      */
 816     vaddr wpend = wp->vaddr + wp->len - 1;
 817     vaddr addrend = addr + len - 1;
 818
 819     return !(addr > wpend || wp->vaddr > addrend);
 820 }
 821
 822 #endif
 823
 824 /* Add a breakpoint.  */
 825 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
 826                           CPUBreakpoint **breakpoint)
 827 {
 828     CPUBreakpoint *bp;
 829
 830     bp = g_malloc(sizeof(*bp));
 831
 832     bp->pc = pc;
 833     bp->flags = flags;
 834
 835     /* keep all GDB-injected breakpoints in front */
 836     if (flags & BP_GDB) {
 837         QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
 838     } else {
 839         QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
 840     }
 841
 842     breakpoint_invalidate(cpu, pc);
 843
 844     if (breakpoint) {
 845         *breakpoint = bp;
 846     }
 847     return 0;
 848 }
 849
 850 /* Remove a specific breakpoint.  */
 851 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
 852 {
 853     CPUBreakpoint *bp;
 854
 855     QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
 856         if (bp->pc == pc && bp->flags == flags) {
 857             cpu_breakpoint_remove_by_ref(cpu, bp);
 858             return 0;
 859         }
 860     }
 861     return -ENOENT;
 862 }
 863
 864 /* Remove a specific breakpoint by reference.  */
 865 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
 866 {
 867     QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
 868
 869     breakpoint_invalidate(cpu, breakpoint->pc);
 870
 871     g_free(breakpoint);
 872 }
 873
 874 /* Remove all matching breakpoints. */
 875 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
 876 {
 877     CPUBreakpoint *bp, *next;
 878
 879     QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
 880         if (bp->flags & mask) {
 881             cpu_breakpoint_remove_by_ref(cpu, bp);
 882         }
 883     }
 884 }
 885
 886 /* enable or disable single step mode. EXCP_DEBUG is returned by the
 887    CPU loop after each instruction */
 888 void cpu_single_step(CPUState *cpu, int enabled)
 889 {
 890     if (cpu->singlestep_enabled != enabled) {
 891         cpu->singlestep_enabled = enabled;
 892         if (kvm_enabled()) {
 893             kvm_update_guest_debug(cpu, 0);
 894         } else {
 895             /* must flush all the translated code to avoid inconsistencies */
 896             /* XXX: only flush what is necessary */
 897             tb_flush(cpu);
 898         }
 899     }
 900 }
 901
 902 void cpu_abort(CPUState *cpu, const char *fmt, ...)
 903 {
 904     va_list ap;
 905     va_list ap2;
 906
 907     va_start(ap, fmt);
 908     va_copy(ap2, ap);
 909     fprintf(stderr, "qemu: fatal: ");
 910     vfprintf(stderr, fmt, ap);
 911     fprintf(stderr, "\n");
 912     cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 913     if (qemu_log_separate()) {
 914         qemu_log("qemu: fatal: ");
 915         qemu_log_vprintf(fmt, ap2);
 916         qemu_log("\n");
 917         log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 918         qemu_log_flush();
 919         qemu_log_close();
 920     }
 921     va_end(ap2);
 922     va_end(ap);
 923     replay_finish();
 924 #if defined(CONFIG_USER_ONLY)
 925     {
 926         struct sigaction act;
 927         sigfillset(&act.sa_mask);
 928         act.sa_handler = SIG_DFL;
 929         sigaction(SIGABRT, &act, NULL);
 930     }
 931 #endif
 932     abort();
 933 }
 934
 935 #if !defined(CONFIG_USER_ONLY)
 936 /* Called from RCU critical section */
 937 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
 938 {
 939     RAMBlock *block;
 940
 941     block = atomic_rcu_read(&ram_list.mru_block);
 942     if (block && addr - block->offset < block->max_length) {
 943         return block;
 944     }
 945     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
 946         if (addr - block->offset < block->max_length) {
 947             goto found;
 948         }
 949     }
 950
 951     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
 952     abort();
 953
 954 found:
 955     /* It is safe to write mru_block outside the iothread lock.  This
 956      * is what happens:
 957      *
 958      *     mru_block = xxx
 959      *     rcu_read_unlock()
 960      *                                        xxx removed from list
 961      *                  rcu_read_lock()
 962      *                  read mru_block
 963      *                                        mru_block = NULL;
 964      *                                        call_rcu(reclaim_ramblock, xxx);
 965      *                  rcu_read_unlock()
 966      *
 967      * atomic_rcu_set is not needed here.  The block was already published
 968      * when it was placed into the list.  Here we're just making an extra
 969      * copy of the pointer.
 970      */
 971     ram_list.mru_block = block;
 972     return block;
 973 }
 974
 975 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
 976 {
 977     CPUState *cpu;
 978     ram_addr_t start1;
 979     RAMBlock *block;
 980     ram_addr_t end;
 981
 982     end = TARGET_PAGE_ALIGN(start + length);
 983     start &= TARGET_PAGE_MASK;
 984
 985     rcu_read_lock();
 986     block = qemu_get_ram_block(start);
 987     assert(block == qemu_get_ram_block(end - 1));
 988     start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
 989     CPU_FOREACH(cpu) {
 990         tlb_reset_dirty(cpu, start1, length);
 991     }
 992     rcu_read_unlock();
 993 }
 994
 995 /* Note: start and end must be within the same ram block.  */
 996 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
 997                                               ram_addr_t length,
 998                                               unsigned client)
 999 {
1000     DirtyMemoryBlocks *blocks;
1001     unsigned long end, page;
1002     bool dirty = false;
1003
1004     if (length == 0) {
1005         return false;
1006     }
1007
1008     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
1009     page = start >> TARGET_PAGE_BITS;
1010
1011     rcu_read_lock();
1012
1013     blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1014
1015     while (page < end) {
1016         unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1017         unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1018         unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1019
1020         dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1021                                               offset, num);
1022         page += num;
1023     }
1024
1025     rcu_read_unlock();
1026
1027     if (dirty && tcg_enabled()) {
1028         tlb_reset_dirty_range_all(start, length);
1029     }
1030
1031     return dirty;
1032 }
1033
1034 /* Called from RCU critical section */
1035 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1036                                        MemoryRegionSection *section,
1037                                        target_ulong vaddr,
1038                                        hwaddr paddr, hwaddr xlat,
1039                                        int prot,
1040                                        target_ulong *address)
1041 {
1042     hwaddr iotlb;
1043     CPUWatchpoint *wp;
1044
1045     if (memory_region_is_ram(section->mr)) {
1046         /* Normal RAM.  */
1047         iotlb = memory_region_get_ram_addr(section->mr) + xlat;
1048         if (!section->readonly) {
1049             iotlb |= PHYS_SECTION_NOTDIRTY;
1050         } else {
1051             iotlb |= PHYS_SECTION_ROM;
1052         }
1053     } else {
1054         AddressSpaceDispatch *d;
1055
1056         d = atomic_rcu_read(&section->address_space->dispatch);
1057         iotlb = section - d->map.sections;
1058         iotlb += xlat;
1059     }
1060
1061     /* Make accesses to pages with watchpoints go via the
1062        watchpoint trap routines.  */
1063     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1064         if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1065             /* Avoid trapping reads of pages with a write breakpoint. */
1066             if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1067                 iotlb = PHYS_SECTION_WATCH + paddr;
1068                 *address |= TLB_MMIO;
1069                 break;
1070             }
1071         }
1072     }
1073
1074     return iotlb;
1075 }
1076 #endif /* defined(CONFIG_USER_ONLY) */
1077
1078 #if !defined(CONFIG_USER_ONLY)
1079
1080 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1081                              uint16_t section);
1082 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1083
1084 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1085                                qemu_anon_ram_alloc;
1086
1087 /*
1088  * Set a custom physical guest memory alloator.
1089  * Accelerators with unusual needs may need this.  Hopefully, we can
1090  * get rid of it eventually.
1091  */
1092 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1093 {
1094     phys_mem_alloc = alloc;
1095 }
1096
1097 static uint16_t phys_section_add(PhysPageMap *map,
1098                                  MemoryRegionSection *section)
1099 {
1100     /* The physical section number is ORed with a page-aligned
1101      * pointer to produce the iotlb entries.  Thus it should
1102      * never overflow into the page-aligned value.
1103      */
1104     assert(map->sections_nb < TARGET_PAGE_SIZE);
1105
1106     if (map->sections_nb == map->sections_nb_alloc) {
1107         map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1108         map->sections = g_renew(MemoryRegionSection, map->sections,
1109                                 map->sections_nb_alloc);
1110     }
1111     map->sections[map->sections_nb] = *section;
1112     memory_region_ref(section->mr);
1113     return map->sections_nb++;
1114 }
1115
1116 static void phys_section_destroy(MemoryRegion *mr)
1117 {
1118     bool have_sub_page = mr->subpage;
1119
1120     memory_region_unref(mr);
1121
1122     if (have_sub_page) {
1123         subpage_t *subpage = container_of(mr, subpage_t, iomem);
1124         object_unref(OBJECT(&subpage->iomem));
1125         g_free(subpage);
1126     }
1127 }
1128
1129 static void phys_sections_free(PhysPageMap *map)
1130 {
1131     while (map->sections_nb > 0) {
1132         MemoryRegionSection *section = &map->sections[--map->sections_nb];
1133         phys_section_destroy(section->mr);
1134     }
1135     g_free(map->sections);
1136     g_free(map->nodes);
1137 }
1138
1139 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1140 {
1141     subpage_t *subpage;
1142     hwaddr base = section->offset_within_address_space
1143         & TARGET_PAGE_MASK;
1144     MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1145                                                    d->map.nodes, d->map.sections);
1146     MemoryRegionSection subsection = {
1147         .offset_within_address_space = base,
1148         .size = int128_make64(TARGET_PAGE_SIZE),
1149     };
1150     hwaddr start, end;
1151
1152     assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1153
1154     if (!(existing->mr->subpage)) {
1155         subpage = subpage_init(d->as, base);
1156         subsection.address_space = d->as;
1157         subsection.mr = &subpage->iomem;
1158         phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1159                       phys_section_add(&d->map, &subsection));
1160     } else {
1161         subpage = container_of(existing->mr, subpage_t, iomem);
1162     }
1163     start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1164     end = start + int128_get64(section->size) - 1;
1165     subpage_register(subpage, start, end,
1166                      phys_section_add(&d->map, section));
1167 }
1168
1169
1170 static void register_multipage(AddressSpaceDispatch *d,
1171                                MemoryRegionSection *section)
1172 {
1173     hwaddr start_addr = section->offset_within_address_space;
1174     uint16_t section_index = phys_section_add(&d->map, section);
1175     uint64_t num_pages = int128_get64(int128_rshift(section->size,
1176                                                     TARGET_PAGE_BITS));
1177
1178     assert(num_pages);
1179     phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1180 }
1181
1182 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1183 {
1184     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1185     AddressSpaceDispatch *d = as->next_dispatch;
1186     MemoryRegionSection now = *section, remain = *section;
1187     Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1188
1189     if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1190         uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1191                        - now.offset_within_address_space;
1192
1193         now.size = int128_min(int128_make64(left), now.size);
1194         register_subpage(d, &now);
1195     } else {
1196         now.size = int128_zero();
1197     }
1198     while (int128_ne(remain.size, now.size)) {
1199         remain.size = int128_sub(remain.size, now.size);
1200         remain.offset_within_address_space += int128_get64(now.size);
1201         remain.offset_within_region += int128_get64(now.size);
1202         now = remain;
1203         if (int128_lt(remain.size, page_size)) {
1204             register_subpage(d, &now);
1205         } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1206             now.size = page_size;
1207             register_subpage(d, &now);
1208         } else {
1209             now.size = int128_and(now.size, int128_neg(page_size));
1210             register_multipage(d, &now);
1211         }
1212     }
1213 }
1214
1215 void qemu_flush_coalesced_mmio_buffer(void)
1216 {
1217     if (kvm_enabled())
1218         kvm_flush_coalesced_mmio_buffer();
1219 }
1220
1221 void qemu_mutex_lock_ramlist(void)
1222 {
1223     qemu_mutex_lock(&ram_list.mutex);
1224 }
1225
1226 void qemu_mutex_unlock_ramlist(void)
1227 {
1228     qemu_mutex_unlock(&ram_list.mutex);
1229 }
1230
1231 #ifdef __linux__
1232 static void *file_ram_alloc(RAMBlock *block,
1233                             ram_addr_t memory,
1234                             const char *path,
1235                             Error **errp)
1236 {
1237     bool unlink_on_error = false;
1238     char *filename;
1239     char *sanitized_name;
1240     char *c;
1241     void *area = MAP_FAILED;
1242     int fd = -1;
1243
1244     if (kvm_enabled() && !kvm_has_sync_mmu()) {
1245         error_setg(errp,
1246                    "host lacks kvm mmu notifiers, -mem-path unsupported");
1247         return NULL;
1248     }
1249
1250     for (;;) {
1251         fd = open(path, O_RDWR);
1252         if (fd >= 0) {
1253             /* @path names an existing file, use it */
1254             break;
1255         }
1256         if (errno == ENOENT) {
1257             /* @path names a file that doesn't exist, create it */
1258             fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1259             if (fd >= 0) {
1260                 unlink_on_error = true;
1261                 break;
1262             }
1263         } else if (errno == EISDIR) {
1264             /* @path names a directory, create a file there */
1265             /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1266             sanitized_name = g_strdup(memory_region_name(block->mr));
1267             for (c = sanitized_name; *c != '\0'; c++) {
1268                 if (*c == '/') {
1269                     *c = '_';
1270                 }
1271             }
1272
1273             filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1274                                        sanitized_name);
1275             g_free(sanitized_name);
1276
1277             fd = mkstemp(filename);
1278             if (fd >= 0) {
1279                 unlink(filename);
1280                 g_free(filename);
1281                 break;
1282             }
1283             g_free(filename);
1284         }
1285         if (errno != EEXIST && errno != EINTR) {
1286             error_setg_errno(errp, errno,
1287                              "can't open backing store %s for guest RAM",
1288                              path);
1289             goto error;
1290         }
1291         /*
1292          * Try again on EINTR and EEXIST.  The latter happens when
1293          * something else creates the file between our two open().
1294          */
1295     }
1296
1297     block->page_size = qemu_fd_getpagesize(fd);
1298     block->mr->align = block->page_size;
1299 #if defined(__s390x__)
1300     if (kvm_enabled()) {
1301         block->mr->align = MAX(block->mr->align, QEMU_VMALLOC_ALIGN);
1302     }
1303 #endif
1304
1305     if (memory < block->page_size) {
1306         error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1307                    "or larger than page size 0x%zx",
1308                    memory, block->page_size);
1309         goto error;
1310     }
1311
1312     memory = ROUND_UP(memory, block->page_size);
1313
1314     /*
1315      * ftruncate is not supported by hugetlbfs in older
1316      * hosts, so don't bother bailing out on errors.
1317      * If anything goes wrong with it under other filesystems,
1318      * mmap will fail.
1319      */
1320     if (ftruncate(fd, memory)) {
1321         perror("ftruncate");
1322     }
1323
1324     area = qemu_ram_mmap(fd, memory, block->mr->align,
1325                          block->flags & RAM_SHARED);
1326     if (area == MAP_FAILED) {
1327         error_setg_errno(errp, errno,
1328                          "unable to map backing store for guest RAM");
1329         goto error;
1330     }
1331
1332     if (mem_prealloc) {
1333         os_mem_prealloc(fd, area, memory, errp);
1334         if (errp && *errp) {
1335             goto error;
1336         }
1337     }
1338
1339     block->fd = fd;
1340     return area;
1341
1342 error:
1343     if (area != MAP_FAILED) {
1344         qemu_ram_munmap(area, memory);
1345     }
1346     if (unlink_on_error) {
1347         unlink(path);
1348     }
1349     if (fd != -1) {
1350         close(fd);
1351     }
1352     return NULL;
1353 }
1354 #endif
1355
1356 /* Called with the ramlist lock held.  */
1357 static ram_addr_t find_ram_offset(ram_addr_t size)
1358 {
1359     RAMBlock *block, *next_block;
1360     ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1361
1362     assert(size != 0); /* it would hand out same offset multiple times */
1363
1364     if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1365         return 0;
1366     }
1367
1368     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1369         ram_addr_t end, next = RAM_ADDR_MAX;
1370
1371         end = block->offset + block->max_length;
1372
1373         QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1374             if (next_block->offset >= end) {
1375                 next = MIN(next, next_block->offset);
1376             }
1377         }
1378         if (next - end >= size && next - end < mingap) {
1379             offset = end;
1380             mingap = next - end;
1381         }
1382     }
1383
1384     if (offset == RAM_ADDR_MAX) {
1385         fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1386                 (uint64_t)size);
1387         abort();
1388     }
1389
1390     return offset;
1391 }
1392
1393 ram_addr_t last_ram_offset(void)
1394 {
1395     RAMBlock *block;
1396     ram_addr_t last = 0;
1397
1398     rcu_read_lock();
1399     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1400         last = MAX(last, block->offset + block->max_length);
1401     }
1402     rcu_read_unlock();
1403     return last;
1404 }
1405
1406 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1407 {
1408     int ret;
1409
1410     /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1411     if (!machine_dump_guest_core(current_machine)) {
1412         ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1413         if (ret) {
1414             perror("qemu_madvise");
1415             fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1416                             "but dump_guest_core=off specified\n");
1417         }
1418     }
1419 }
1420
1421 const char *qemu_ram_get_idstr(RAMBlock *rb)
1422 {
1423     return rb->idstr;
1424 }
1425
1426 /* Called with iothread lock held.  */
1427 void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
1428 {
1429     RAMBlock *block;
1430
1431     assert(new_block);
1432     assert(!new_block->idstr[0]);
1433
1434     if (dev) {
1435         char *id = qdev_get_dev_path(dev);
1436         if (id) {
1437             snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1438             g_free(id);
1439         }
1440     }
1441     pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1442
1443     rcu_read_lock();
1444     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1445         if (block != new_block &&
1446             !strcmp(block->idstr, new_block->idstr)) {
1447             fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1448                     new_block->idstr);
1449             abort();
1450         }
1451     }
1452     rcu_read_unlock();
1453 }
1454
1455 /* Called with iothread lock held.  */
1456 void qemu_ram_unset_idstr(RAMBlock *block)
1457 {
1458     /* FIXME: arch_init.c assumes that this is not called throughout
1459      * migration.  Ignore the problem since hot-unplug during migration
1460      * does not work anyway.
1461      */
1462     if (block) {
1463         memset(block->idstr, 0, sizeof(block->idstr));
1464     }
1465 }
1466
1467 size_t qemu_ram_pagesize(RAMBlock *rb)
1468 {
1469     return rb->page_size;
1470 }
1471
1472 static int memory_try_enable_merging(void *addr, size_t len)
1473 {
1474     if (!machine_mem_merge(current_machine)) {
1475         /* disabled by the user */
1476         return 0;
1477     }
1478
1479     return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1480 }
1481
1482 /* Only legal before guest might have detected the memory size: e.g. on
1483  * incoming migration, or right after reset.
1484  *
1485  * As memory core doesn't know how is memory accessed, it is up to
1486  * resize callback to update device state and/or add assertions to detect
1487  * misuse, if necessary.
1488  */
1489 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
1490 {
1491     assert(block);
1492
1493     newsize = HOST_PAGE_ALIGN(newsize);
1494
1495     if (block->used_length == newsize) {
1496         return 0;
1497     }
1498
1499     if (!(block->flags & RAM_RESIZEABLE)) {
1500         error_setg_errno(errp, EINVAL,
1501                          "Length mismatch: %s: 0x" RAM_ADDR_FMT
1502                          " in != 0x" RAM_ADDR_FMT, block->idstr,
1503                          newsize, block->used_length);
1504         return -EINVAL;
1505     }
1506
1507     if (block->max_length < newsize) {
1508         error_setg_errno(errp, EINVAL,
1509                          "Length too large: %s: 0x" RAM_ADDR_FMT
1510                          " > 0x" RAM_ADDR_FMT, block->idstr,
1511                          newsize, block->max_length);
1512         return -EINVAL;
1513     }
1514
1515     cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1516     block->used_length = newsize;
1517     cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1518                                         DIRTY_CLIENTS_ALL);
1519     memory_region_set_size(block->mr, newsize);
1520     if (block->resized) {
1521         block->resized(block->idstr, newsize, block->host);
1522     }
1523     return 0;
1524 }
1525
1526 /* Called with ram_list.mutex held */
1527 static void dirty_memory_extend(ram_addr_t old_ram_size,
1528                                 ram_addr_t new_ram_size)
1529 {
1530     ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1531                                              DIRTY_MEMORY_BLOCK_SIZE);
1532     ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1533                                              DIRTY_MEMORY_BLOCK_SIZE);
1534     int i;
1535
1536     /* Only need to extend if block count increased */
1537     if (new_num_blocks <= old_num_blocks) {
1538         return;
1539     }
1540
1541     for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1542         DirtyMemoryBlocks *old_blocks;
1543         DirtyMemoryBlocks *new_blocks;
1544         int j;
1545
1546         old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1547         new_blocks = g_malloc(sizeof(*new_blocks) +
1548                               sizeof(new_blocks->blocks[0]) * new_num_blocks);
1549
1550         if (old_num_blocks) {
1551             memcpy(new_blocks->blocks, old_blocks->blocks,
1552                    old_num_blocks * sizeof(old_blocks->blocks[0]));
1553         }
1554
1555         for (j = old_num_blocks; j < new_num_blocks; j++) {
1556             new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1557         }
1558
1559         atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1560
1561         if (old_blocks) {
1562             g_free_rcu(old_blocks, rcu);
1563         }
1564     }
1565 }
1566
1567 static void ram_block_add(RAMBlock *new_block, Error **errp)
1568 {
1569     RAMBlock *block;
1570     RAMBlock *last_block = NULL;
1571     ram_addr_t old_ram_size, new_ram_size;
1572     Error *err = NULL;
1573
1574     old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1575
1576     qemu_mutex_lock_ramlist();
1577     new_block->offset = find_ram_offset(new_block->max_length);
1578
1579     if (!new_block->host) {
1580         if (xen_enabled()) {
1581             xen_ram_alloc(new_block->offset, new_block->max_length,
1582                           new_block->mr, &err);
1583             if (err) {
1584                 error_propagate(errp, err);
1585                 qemu_mutex_unlock_ramlist();
1586                 return;
1587             }
1588         } else {
1589             new_block->host = phys_mem_alloc(new_block->max_length,
1590                                              &new_block->mr->align);
1591             if (!new_block->host) {
1592                 error_setg_errno(errp, errno,
1593                                  "cannot set up guest memory '%s'",
1594                                  memory_region_name(new_block->mr));
1595                 qemu_mutex_unlock_ramlist();
1596                 return;
1597             }
1598             memory_try_enable_merging(new_block->host, new_block->max_length);
1599         }
1600     }
1601
1602     new_ram_size = MAX(old_ram_size,
1603               (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1604     if (new_ram_size > old_ram_size) {
1605         migration_bitmap_extend(old_ram_size, new_ram_size);
1606         dirty_memory_extend(old_ram_size, new_ram_size);
1607     }
1608     /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
1609      * QLIST (which has an RCU-friendly variant) does not have insertion at
1610      * tail, so save the last element in last_block.
1611      */
1612     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1613         last_block = block;
1614         if (block->max_length < new_block->max_length) {
1615             break;
1616         }
1617     }
1618     if (block) {
1619         QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1620     } else if (last_block) {
1621         QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1622     } else { /* list is empty */
1623         QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1624     }
1625     ram_list.mru_block = NULL;
1626
1627     /* Write list before version */
1628     smp_wmb();
1629     ram_list.version++;
1630     qemu_mutex_unlock_ramlist();
1631
1632     cpu_physical_memory_set_dirty_range(new_block->offset,
1633                                         new_block->used_length,
1634                                         DIRTY_CLIENTS_ALL);
1635
1636     if (new_block->host) {
1637         qemu_ram_setup_dump(new_block->host, new_block->max_length);
1638         qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1639         /* MADV_DONTFORK is also needed by KVM in absence of synchronous MMU */
1640         qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1641     }
1642 }
1643
1644 #ifdef __linux__
1645 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1646                                    bool share, const char *mem_path,
1647                                    Error **errp)
1648 {
1649     RAMBlock *new_block;
1650     Error *local_err = NULL;
1651
1652     if (xen_enabled()) {
1653         error_setg(errp, "-mem-path not supported with Xen");
1654         return NULL;
1655     }
1656
1657     if (phys_mem_alloc != qemu_anon_ram_alloc) {
1658         /*
1659          * file_ram_alloc() needs to allocate just like
1660          * phys_mem_alloc, but we haven't bothered to provide
1661          * a hook there.
1662          */
1663         error_setg(errp,
1664                    "-mem-path not supported with this accelerator");
1665         return NULL;
1666     }
1667
1668     size = HOST_PAGE_ALIGN(size);
1669     new_block = g_malloc0(sizeof(*new_block));
1670     new_block->mr = mr;
1671     new_block->used_length = size;
1672     new_block->max_length = size;
1673     new_block->flags = share ? RAM_SHARED : 0;
1674     new_block->host = file_ram_alloc(new_block, size,
1675                                      mem_path, errp);
1676     if (!new_block->host) {
1677         g_free(new_block);
1678         return NULL;
1679     }
1680
1681     ram_block_add(new_block, &local_err);
1682     if (local_err) {
1683         g_free(new_block);
1684         error_propagate(errp, local_err);
1685         return NULL;
1686     }
1687     return new_block;
1688 }
1689 #endif
1690
1691 static
1692 RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1693                                   void (*resized)(const char*,
1694                                                   uint64_t length,
1695                                                   void *host),
1696                                   void *host, bool resizeable,
1697                                   MemoryRegion *mr, Error **errp)
1698 {
1699     RAMBlock *new_block;
1700     Error *local_err = NULL;
1701
1702     size = HOST_PAGE_ALIGN(size);
1703     max_size = HOST_PAGE_ALIGN(max_size);
1704     new_block = g_malloc0(sizeof(*new_block));
1705     new_block->mr = mr;
1706     new_block->resized = resized;
1707     new_block->used_length = size;
1708     new_block->max_length = max_size;
1709     assert(max_size >= size);
1710     new_block->fd = -1;
1711     new_block->page_size = getpagesize();
1712     new_block->host = host;
1713     if (host) {
1714         new_block->flags |= RAM_PREALLOC;
1715     }
1716     if (resizeable) {
1717         new_block->flags |= RAM_RESIZEABLE;
1718     }
1719     ram_block_add(new_block, &local_err);
1720     if (local_err) {
1721         g_free(new_block);
1722         error_propagate(errp, local_err);
1723         return NULL;
1724     }
1725     return new_block;
1726 }
1727
1728 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1729                                    MemoryRegion *mr, Error **errp)
1730 {
1731     return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1732 }
1733
1734 RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1735 {
1736     return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1737 }
1738
1739 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1740                                      void (*resized)(const char*,
1741                                                      uint64_t length,
1742                                                      void *host),
1743                                      MemoryRegion *mr, Error **errp)
1744 {
1745     return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1746 }
1747
1748 static void reclaim_ramblock(RAMBlock *block)
1749 {
1750     if (block->flags & RAM_PREALLOC) {
1751         ;
1752     } else if (xen_enabled()) {
1753         xen_invalidate_map_cache_entry(block->host);
1754 #ifndef _WIN32
1755     } else if (block->fd >= 0) {
1756         qemu_ram_munmap(block->host, block->max_length);
1757         close(block->fd);
1758 #endif
1759     } else {
1760         qemu_anon_ram_free(block->host, block->max_length);
1761     }
1762     g_free(block);
1763 }
1764
1765 void qemu_ram_free(RAMBlock *block)
1766 {
1767     if (!block) {
1768         return;
1769     }
1770
1771     qemu_mutex_lock_ramlist();
1772     QLIST_REMOVE_RCU(block, next);
1773     ram_list.mru_block = NULL;
1774     /* Write list before version */
1775     smp_wmb();
1776     ram_list.version++;
1777     call_rcu(block, reclaim_ramblock, rcu);
1778     qemu_mutex_unlock_ramlist();
1779 }
1780
1781 #ifndef _WIN32
1782 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1783 {
1784     RAMBlock *block;
1785     ram_addr_t offset;
1786     int flags;
1787     void *area, *vaddr;
1788
1789     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1790         offset = addr - block->offset;
1791         if (offset < block->max_length) {
1792             vaddr = ramblock_ptr(block, offset);
1793             if (block->flags & RAM_PREALLOC) {
1794                 ;
1795             } else if (xen_enabled()) {
1796                 abort();
1797             } else {
1798                 flags = MAP_FIXED;
1799                 if (block->fd >= 0) {
1800                     flags |= (block->flags & RAM_SHARED ?
1801                               MAP_SHARED : MAP_PRIVATE);
1802                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1803                                 flags, block->fd, offset);
1804                 } else {
1805                     /*
1806                      * Remap needs to match alloc.  Accelerators that
1807                      * set phys_mem_alloc never remap.  If they did,
1808                      * we'd need a remap hook here.
1809                      */
1810                     assert(phys_mem_alloc == qemu_anon_ram_alloc);
1811
1812                     flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1813                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1814                                 flags, -1, 0);
1815                 }
1816                 if (area != vaddr) {
1817                     fprintf(stderr, "Could not remap addr: "
1818                             RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1819                             length, addr);
1820                     exit(1);
1821                 }
1822                 memory_try_enable_merging(vaddr, length);
1823                 qemu_ram_setup_dump(vaddr, length);
1824             }
1825         }
1826     }
1827 }
1828 #endif /* !_WIN32 */
1829
1830 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1831  * This should not be used for general purpose DMA.  Use address_space_map
1832  * or address_space_rw instead. For local memory (e.g. video ram) that the
1833  * device owns, use memory_region_get_ram_ptr.
1834  *
1835  * Called within RCU critical section.
1836  */
1837 void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1838 {
1839     RAMBlock *block = ram_block;
1840
1841     if (block == NULL) {
1842         block = qemu_get_ram_block(addr);
1843         addr -= block->offset;
1844     }
1845
1846     if (xen_enabled() && block->host == NULL) {
1847         /* We need to check if the requested address is in the RAM
1848          * because we don't want to map the entire memory in QEMU.
1849          * In that case just map until the end of the page.
1850          */
1851         if (block->offset == 0) {
1852             return xen_map_cache(addr, 0, 0);
1853         }
1854
1855         block->host = xen_map_cache(block->offset, block->max_length, 1);
1856     }
1857     return ramblock_ptr(block, addr);
1858 }
1859
1860 /* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr
1861  * but takes a size argument.
1862  *
1863  * Called within RCU critical section.
1864  */
1865 static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1866                                  hwaddr *size)
1867 {
1868     RAMBlock *block = ram_block;
1869     if (*size == 0) {
1870         return NULL;
1871     }
1872
1873     if (block == NULL) {
1874         block = qemu_get_ram_block(addr);
1875         addr -= block->offset;
1876     }
1877     *size = MIN(*size, block->max_length - addr);
1878
1879     if (xen_enabled() && block->host == NULL) {
1880         /* We need to check if the requested address is in the RAM
1881          * because we don't want to map the entire memory in QEMU.
1882          * In that case just map the requested area.
1883          */
1884         if (block->offset == 0) {
1885             return xen_map_cache(addr, *size, 1);
1886         }
1887
1888         block->host = xen_map_cache(block->offset, block->max_length, 1);
1889     }
1890
1891     return ramblock_ptr(block, addr);
1892 }
1893
1894 /*
1895  * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1896  * in that RAMBlock.
1897  *
1898  * ptr: Host pointer to look up
1899  * round_offset: If true round the result offset down to a page boundary
1900  * *ram_addr: set to result ram_addr
1901  * *offset: set to result offset within the RAMBlock
1902  *
1903  * Returns: RAMBlock (or NULL if not found)
1904  *
1905  * By the time this function returns, the returned pointer is not protected
1906  * by RCU anymore.  If the caller is not within an RCU critical section and
1907  * does not hold the iothread lock, it must have other means of protecting the
1908  * pointer, such as a reference to the region that includes the incoming
1909  * ram_addr_t.
1910  */
1911 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1912                                    ram_addr_t *offset)
1913 {
1914     RAMBlock *block;
1915     uint8_t *host = ptr;
1916
1917     if (xen_enabled()) {
1918         ram_addr_t ram_addr;
1919         rcu_read_lock();
1920         ram_addr = xen_ram_addr_from_mapcache(ptr);
1921         block = qemu_get_ram_block(ram_addr);
1922         if (block) {
1923             *offset = ram_addr - block->offset;
1924         }
1925         rcu_read_unlock();
1926         return block;
1927     }
1928
1929     rcu_read_lock();
1930     block = atomic_rcu_read(&ram_list.mru_block);
1931     if (block && block->host && host - block->host < block->max_length) {
1932         goto found;
1933     }
1934
1935     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1936         /* This case append when the block is not mapped. */
1937         if (block->host == NULL) {
1938             continue;
1939         }
1940         if (host - block->host < block->max_length) {
1941             goto found;
1942         }
1943     }
1944
1945     rcu_read_unlock();
1946     return NULL;
1947
1948 found:
1949     *offset = (host - block->host);
1950     if (round_offset) {
1951         *offset &= TARGET_PAGE_MASK;
1952     }
1953     rcu_read_unlock();
1954     return block;
1955 }
1956
1957 /*
1958  * Finds the named RAMBlock
1959  *
1960  * name: The name of RAMBlock to find
1961  *
1962  * Returns: RAMBlock (or NULL if not found)
1963  */
1964 RAMBlock *qemu_ram_block_by_name(const char *name)
1965 {
1966     RAMBlock *block;
1967
1968     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1969         if (!strcmp(name, block->idstr)) {
1970             return block;
1971         }
1972     }
1973
1974     return NULL;
1975 }
1976
1977 /* Some of the softmmu routines need to translate from a host pointer
1978    (typically a TLB entry) back to a ram offset.  */
1979 ram_addr_t qemu_ram_addr_from_host(void *ptr)
1980 {
1981     RAMBlock *block;
1982     ram_addr_t offset;
1983
1984     block = qemu_ram_block_from_host(ptr, false, &offset);
1985     if (!block) {
1986         return RAM_ADDR_INVALID;
1987     }
1988
1989     return block->offset + offset;
1990 }
1991
1992 /* Called within RCU critical section.  */
1993 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1994                                uint64_t val, unsigned size)
1995 {
1996     bool locked = false;
1997
1998     if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1999         locked = true;
2000         tb_lock();
2001         tb_invalidate_phys_page_fast(ram_addr, size);
2002     }
2003     switch (size) {
2004     case 1:
2005         stb_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2006         break;
2007     case 2:
2008         stw_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2009         break;
2010     case 4:
2011         stl_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2012         break;
2013     default:
2014         abort();
2015     }
2016
2017     if (locked) {
2018         tb_unlock();
2019     }
2020
2021     /* Set both VGA and migration bits for simplicity and to remove
2022      * the notdirty callback faster.
2023      */
2024     cpu_physical_memory_set_dirty_range(ram_addr, size,
2025                                         DIRTY_CLIENTS_NOCODE);
2026     /* we remove the notdirty callback only if the code has been
2027        flushed */
2028     if (!cpu_physical_memory_is_clean(ram_addr)) {
2029         tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2030     }
2031 }
2032
2033 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2034                                  unsigned size, bool is_write)
2035 {
2036     return is_write;
2037 }
2038
2039 static const MemoryRegionOps notdirty_mem_ops = {
2040     .write = notdirty_mem_write,
2041     .valid.accepts = notdirty_mem_accepts,
2042     .endianness = DEVICE_NATIVE_ENDIAN,
2043 };
2044
2045 /* Generate a debug exception if a watchpoint has been hit.  */
2046 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2047 {
2048     CPUState *cpu = current_cpu;
2049     CPUClass *cc = CPU_GET_CLASS(cpu);
2050     CPUArchState *env = cpu->env_ptr;
2051     target_ulong pc, cs_base;
2052     target_ulong vaddr;
2053     CPUWatchpoint *wp;
2054     uint32_t cpu_flags;
2055
2056     if (cpu->watchpoint_hit) {
2057         /* We re-entered the check after replacing the TB. Now raise
2058          * the debug interrupt so that is will trigger after the
2059          * current instruction. */
2060         cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2061         return;
2062     }
2063     vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2064     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2065         if (cpu_watchpoint_address_matches(wp, vaddr, len)
2066             && (wp->flags & flags)) {
2067             if (flags == BP_MEM_READ) {
2068                 wp->flags |= BP_WATCHPOINT_HIT_READ;
2069             } else {
2070                 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2071             }
2072             wp->hitaddr = vaddr;
2073             wp->hitattrs = attrs;
2074             if (!cpu->watchpoint_hit) {
2075                 if (wp->flags & BP_CPU &&
2076                     !cc->debug_check_watchpoint(cpu, wp)) {
2077                     wp->flags &= ~BP_WATCHPOINT_HIT;
2078                     continue;
2079                 }
2080                 cpu->watchpoint_hit = wp;
2081
2082                 /* The tb_lock will be reset when cpu_loop_exit or
2083                  * cpu_loop_exit_noexc longjmp back into the cpu_exec
2084                  * main loop.
2085                  */
2086                 tb_lock();
2087                 tb_check_watchpoint(cpu);
2088                 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2089                     cpu->exception_index = EXCP_DEBUG;
2090                     cpu_loop_exit(cpu);
2091                 } else {
2092                     cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2093                     tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2094                     cpu_loop_exit_noexc(cpu);
2095                 }
2096             }
2097         } else {
2098             wp->flags &= ~BP_WATCHPOINT_HIT;
2099         }
2100     }
2101 }
2102
2103 /* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
2104    so these check for a hit then pass through to the normal out-of-line
2105    phys routines.  */
2106 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2107                                   unsigned size, MemTxAttrs attrs)
2108 {
2109     MemTxResult res;
2110     uint64_t data;
2111     int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2112     AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2113
2114     check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2115     switch (size) {
2116     case 1:
2117         data = address_space_ldub(as, addr, attrs, &res);
2118         break;
2119     case 2:
2120         data = address_space_lduw(as, addr, attrs, &res);
2121         break;
2122     case 4:
2123         data = address_space_ldl(as, addr, attrs, &res);
2124         break;
2125     default: abort();
2126     }
2127     *pdata = data;
2128     return res;
2129 }
2130
2131 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2132                                    uint64_t val, unsigned size,
2133                                    MemTxAttrs attrs)
2134 {
2135     MemTxResult res;
2136     int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2137     AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2138
2139     check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2140     switch (size) {
2141     case 1:
2142         address_space_stb(as, addr, val, attrs, &res);
2143         break;
2144     case 2:
2145         address_space_stw(as, addr, val, attrs, &res);
2146         break;
2147     case 4:
2148         address_space_stl(as, addr, val, attrs, &res);
2149         break;
2150     default: abort();
2151     }
2152     return res;
2153 }
2154
2155 static const MemoryRegionOps watch_mem_ops = {
2156     .read_with_attrs = watch_mem_read,
2157     .write_with_attrs = watch_mem_write,
2158     .endianness = DEVICE_NATIVE_ENDIAN,
2159 };
2160
2161 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2162                                 unsigned len, MemTxAttrs attrs)
2163 {
2164     subpage_t *subpage = opaque;
2165     uint8_t buf[8];
2166     MemTxResult res;
2167
2168 #if defined(DEBUG_SUBPAGE)
2169     printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2170            subpage, len, addr);
2171 #endif
2172     res = address_space_read(subpage->as, addr + subpage->base,
2173                              attrs, buf, len);
2174     if (res) {
2175         return res;
2176     }
2177     switch (len) {
2178     case 1:
2179         *data = ldub_p(buf);
2180         return MEMTX_OK;
2181     case 2:
2182         *data = lduw_p(buf);
2183         return MEMTX_OK;
2184     case 4:
2185         *data = ldl_p(buf);
2186         return MEMTX_OK;
2187     case 8:
2188         *data = ldq_p(buf);
2189         return MEMTX_OK;
2190     default:
2191         abort();
2192     }
2193 }
2194
2195 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2196                                  uint64_t value, unsigned len, MemTxAttrs attrs)
2197 {
2198     subpage_t *subpage = opaque;
2199     uint8_t buf[8];
2200
2201 #if defined(DEBUG_SUBPAGE)
2202     printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2203            " value %"PRIx64"\n",
2204            __func__, subpage, len, addr, value);
2205 #endif
2206     switch (len) {
2207     case 1:
2208         stb_p(buf, value);
2209         break;
2210     case 2:
2211         stw_p(buf, value);
2212         break;
2213     case 4:
2214         stl_p(buf, value);
2215         break;
2216     case 8:
2217         stq_p(buf, value);
2218         break;
2219     default:
2220         abort();
2221     }
2222     return address_space_write(subpage->as, addr + subpage->base,
2223                                attrs, buf, len);
2224 }
2225
2226 static bool subpage_accepts(void *opaque, hwaddr addr,
2227                             unsigned len, bool is_write)
2228 {
2229     subpage_t *subpage = opaque;
2230 #if defined(DEBUG_SUBPAGE)
2231     printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2232            __func__, subpage, is_write ? 'w' : 'r', len, addr);
2233 #endif
2234
2235     return address_space_access_valid(subpage->as, addr + subpage->base,
2236                                       len, is_write);
2237 }
2238
2239 static const MemoryRegionOps subpage_ops = {
2240     .read_with_attrs = subpage_read,
2241     .write_with_attrs = subpage_write,
2242     .impl.min_access_size = 1,
2243     .impl.max_access_size = 8,
2244     .valid.min_access_size = 1,
2245     .valid.max_access_size = 8,
2246     .valid.accepts = subpage_accepts,
2247     .endianness = DEVICE_NATIVE_ENDIAN,
2248 };
2249
2250 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2251                              uint16_t section)
2252 {
2253     int idx, eidx;
2254
2255     if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2256         return -1;
2257     idx = SUBPAGE_IDX(start);
2258     eidx = SUBPAGE_IDX(end);
2259 #if defined(DEBUG_SUBPAGE)
2260     printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2261            __func__, mmio, start, end, idx, eidx, section);
2262 #endif
2263     for (; idx <= eidx; idx++) {
2264         mmio->sub_section[idx] = section;
2265     }
2266
2267     return 0;
2268 }
2269
2270 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2271 {
2272     subpage_t *mmio;
2273
2274     mmio = g_malloc0(sizeof(subpage_t) + TARGET_PAGE_SIZE * sizeof(uint16_t));
2275     mmio->as = as;
2276     mmio->base = base;
2277     memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2278                           NULL, TARGET_PAGE_SIZE);
2279     mmio->iomem.subpage = true;
2280 #if defined(DEBUG_SUBPAGE)
2281     printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2282            mmio, base, TARGET_PAGE_SIZE);
2283 #endif
2284     subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2285
2286     return mmio;
2287 }
2288
2289 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2290                               MemoryRegion *mr)
2291 {
2292     assert(as);
2293     MemoryRegionSection section = {
2294         .address_space = as,
2295         .mr = mr,
2296         .offset_within_address_space = 0,
2297         .offset_within_region = 0,
2298         .size = int128_2_64(),
2299     };
2300
2301     return phys_section_add(map, &section);
2302 }
2303
2304 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2305 {
2306     int asidx = cpu_asidx_from_attrs(cpu, attrs);
2307     CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2308     AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2309     MemoryRegionSection *sections = d->map.sections;
2310
2311     return sections[index & ~TARGET_PAGE_MASK].mr;
2312 }
2313
2314 static void io_mem_init(void)
2315 {
2316     memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2317     memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2318                           NULL, UINT64_MAX);
2319     memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2320                           NULL, UINT64_MAX);
2321     memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2322                           NULL, UINT64_MAX);
2323 }
2324
2325 static void mem_begin(MemoryListener *listener)
2326 {
2327     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2328     AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2329     uint16_t n;
2330
2331     n = dummy_section(&d->map, as, &io_mem_unassigned);
2332     assert(n == PHYS_SECTION_UNASSIGNED);
2333     n = dummy_section(&d->map, as, &io_mem_notdirty);
2334     assert(n == PHYS_SECTION_NOTDIRTY);
2335     n = dummy_section(&d->map, as, &io_mem_rom);
2336     assert(n == PHYS_SECTION_ROM);
2337     n = dummy_section(&d->map, as, &io_mem_watch);
2338     assert(n == PHYS_SECTION_WATCH);
2339
2340     d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2341     d->as = as;
2342     as->next_dispatch = d;
2343 }
2344
2345 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2346 {
2347     phys_sections_free(&d->map);
2348     g_free(d);
2349 }
2350
2351 static void mem_commit(MemoryListener *listener)
2352 {
2353     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2354     AddressSpaceDispatch *cur = as->dispatch;
2355     AddressSpaceDispatch *next = as->next_dispatch;
2356
2357     phys_page_compact_all(next, next->map.nodes_nb);
2358
2359     atomic_rcu_set(&as->dispatch, next);
2360     if (cur) {
2361         call_rcu(cur, address_space_dispatch_free, rcu);
2362     }
2363 }
2364
2365 static void tcg_commit(MemoryListener *listener)
2366 {
2367     CPUAddressSpace *cpuas;
2368     AddressSpaceDispatch *d;
2369
2370     /* since each CPU stores ram addresses in its TLB cache, we must
2371        reset the modified entries */
2372     cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2373     cpu_reloading_memory_map();
2374     /* The CPU and TLB are protected by the iothread lock.
2375      * We reload the dispatch pointer now because cpu_reloading_memory_map()
2376      * may have split the RCU critical section.
2377      */
2378     d = atomic_rcu_read(&cpuas->as->dispatch);
2379     cpuas->memory_dispatch = d;
2380     tlb_flush(cpuas->cpu, 1);
2381 }
2382
2383 void address_space_init_dispatch(AddressSpace *as)
2384 {
2385     as->dispatch = NULL;
2386     as->dispatch_listener = (MemoryListener) {
2387         .begin = mem_begin,
2388         .commit = mem_commit,
2389         .region_add = mem_add,
2390         .region_nop = mem_add,
2391         .priority = 0,
2392     };
2393     memory_listener_register(&as->dispatch_listener, as);
2394 }
2395
2396 void address_space_unregister(AddressSpace *as)
2397 {
2398     memory_listener_unregister(&as->dispatch_listener);
2399 }
2400
2401 void address_space_destroy_dispatch(AddressSpace *as)
2402 {
2403     AddressSpaceDispatch *d = as->dispatch;
2404
2405     atomic_rcu_set(&as->dispatch, NULL);
2406     if (d) {
2407         call_rcu(d, address_space_dispatch_free, rcu);
2408     }
2409 }
2410
2411 static void memory_map_init(void)
2412 {
2413     system_memory = g_malloc(sizeof(*system_memory));
2414
2415     memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2416     address_space_init(&address_space_memory, system_memory, "memory");
2417
2418     system_io = g_malloc(sizeof(*system_io));
2419     memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2420                           65536);
2421     address_space_init(&address_space_io, system_io, "I/O");
2422 }
2423
2424 MemoryRegion *get_system_memory(void)
2425 {
2426     return system_memory;
2427 }
2428
2429 MemoryRegion *get_system_io(void)
2430 {
2431     return system_io;
2432 }
2433
2434 #endif /* !defined(CONFIG_USER_ONLY) */
2435
2436 /* physical memory access (slow version, mainly for debug) */
2437 #if defined(CONFIG_USER_ONLY)
2438 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2439                         uint8_t *buf, int len, int is_write)
2440 {
2441     int l, flags;
2442     target_ulong page;
2443     void * p;
2444
2445     while (len > 0) {
2446         page = addr & TARGET_PAGE_MASK;
2447         l = (page + TARGET_PAGE_SIZE) - addr;
2448         if (l > len)
2449             l = len;
2450         flags = page_get_flags(page);
2451         if (!(flags & PAGE_VALID))
2452             return -1;
2453         if (is_write) {
2454             if (!(flags & PAGE_WRITE))
2455                 return -1;
2456             /* XXX: this code should not depend on lock_user */
2457             if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2458                 return -1;
2459             memcpy(p, buf, l);
2460             unlock_user(p, addr, l);
2461         } else {
2462             if (!(flags & PAGE_READ))
2463                 return -1;
2464             /* XXX: this code should not depend on lock_user */
2465             if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2466                 return -1;
2467             memcpy(buf, p, l);
2468             unlock_user(p, addr, 0);
2469         }
2470         len -= l;
2471         buf += l;
2472         addr += l;
2473     }
2474     return 0;
2475 }
2476
2477 #else
2478
2479 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2480                                      hwaddr length)
2481 {
2482     uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2483     addr += memory_region_get_ram_addr(mr);
2484
2485     /* No early return if dirty_log_mask is or becomes 0, because
2486      * cpu_physical_memory_set_dirty_range will still call
2487      * xen_modified_memory.
2488      */
2489     if (dirty_log_mask) {
2490         dirty_log_mask =
2491             cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2492     }
2493     if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2494         tb_lock();
2495         tb_invalidate_phys_range(addr, addr + length);
2496         tb_unlock();
2497         dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2498     }
2499     cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2500 }
2501
2502 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2503 {
2504     unsigned access_size_max = mr->ops->valid.max_access_size;
2505
2506     /* Regions are assumed to support 1-4 byte accesses unless
2507        otherwise specified.  */
2508     if (access_size_max == 0) {
2509         access_size_max = 4;
2510     }
2511
2512     /* Bound the maximum access by the alignment of the address.  */
2513     if (!mr->ops->impl.unaligned) {
2514         unsigned align_size_max = addr & -addr;
2515         if (align_size_max != 0 && align_size_max < access_size_max) {
2516             access_size_max = align_size_max;
2517         }
2518     }
2519
2520     /* Don't attempt accesses larger than the maximum.  */
2521     if (l > access_size_max) {
2522         l = access_size_max;
2523     }
2524     l = pow2floor(l);
2525
2526     return l;
2527 }
2528
2529 static bool prepare_mmio_access(MemoryRegion *mr)
2530 {
2531     bool unlocked = !qemu_mutex_iothread_locked();
2532     bool release_lock = false;
2533
2534     if (unlocked && mr->global_locking) {
2535         qemu_mutex_lock_iothread();
2536         unlocked = false;
2537         release_lock = true;
2538     }
2539     if (mr->flush_coalesced_mmio) {
2540         if (unlocked) {
2541             qemu_mutex_lock_iothread();
2542         }
2543         qemu_flush_coalesced_mmio_buffer();
2544         if (unlocked) {
2545             qemu_mutex_unlock_iothread();
2546         }
2547     }
2548
2549     return release_lock;
2550 }
2551
2552 /* Called within RCU critical section.  */
2553 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2554                                                 MemTxAttrs attrs,
2555                                                 const uint8_t *buf,
2556                                                 int len, hwaddr addr1,
2557                                                 hwaddr l, MemoryRegion *mr)
2558 {
2559     uint8_t *ptr;
2560     uint64_t val;
2561     MemTxResult result = MEMTX_OK;
2562     bool release_lock = false;
2563
2564     for (;;) {
2565         if (!memory_access_is_direct(mr, true)) {
2566             release_lock |= prepare_mmio_access(mr);
2567             l = memory_access_size(mr, l, addr1);
2568             /* XXX: could force current_cpu to NULL to avoid
2569                potential bugs */
2570             switch (l) {
2571             case 8:
2572                 /* 64 bit write access */
2573                 val = ldq_p(buf);
2574                 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2575                                                        attrs);
2576                 break;
2577             case 4:
2578                 /* 32 bit write access */
2579                 val = ldl_p(buf);
2580                 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2581                                                        attrs);
2582                 break;
2583             case 2:
2584                 /* 16 bit write access */
2585                 val = lduw_p(buf);
2586                 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2587                                                        attrs);
2588                 break;
2589             case 1:
2590                 /* 8 bit write access */
2591                 val = ldub_p(buf);
2592                 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2593                                                        attrs);
2594                 break;
2595             default:
2596                 abort();
2597             }
2598         } else {
2599             /* RAM case */
2600             ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2601             memcpy(ptr, buf, l);
2602             invalidate_and_set_dirty(mr, addr1, l);
2603         }
2604
2605         if (release_lock) {
2606             qemu_mutex_unlock_iothread();
2607             release_lock = false;
2608         }
2609
2610         len -= l;
2611         buf += l;
2612         addr += l;
2613
2614         if (!len) {
2615             break;
2616         }
2617
2618         l = len;
2619         mr = address_space_translate(as, addr, &addr1, &l, true);
2620     }
2621
2622     return result;
2623 }
2624
2625 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2626                                 const uint8_t *buf, int len)
2627 {
2628     hwaddr l;
2629     hwaddr addr1;
2630     MemoryRegion *mr;
2631     MemTxResult result = MEMTX_OK;
2632
2633     if (len > 0) {
2634         rcu_read_lock();
2635         l = len;
2636         mr = address_space_translate(as, addr, &addr1, &l, true);
2637         result = address_space_write_continue(as, addr, attrs, buf, len,
2638                                               addr1, l, mr);
2639         rcu_read_unlock();
2640     }
2641
2642     return result;
2643 }
2644
2645 /* Called within RCU critical section.  */
2646 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2647                                         MemTxAttrs attrs, uint8_t *buf,
2648                                         int len, hwaddr addr1, hwaddr l,
2649                                         MemoryRegion *mr)
2650 {
2651     uint8_t *ptr;
2652     uint64_t val;
2653     MemTxResult result = MEMTX_OK;
2654     bool release_lock = false;
2655
2656     for (;;) {
2657         if (!memory_access_is_direct(mr, false)) {
2658             /* I/O case */
2659             release_lock |= prepare_mmio_access(mr);
2660             l = memory_access_size(mr, l, addr1);
2661             switch (l) {
2662             case 8:
2663                 /* 64 bit read access */
2664                 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2665                                                       attrs);
2666                 stq_p(buf, val);
2667                 break;
2668             case 4:
2669                 /* 32 bit read access */
2670                 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2671                                                       attrs);
2672                 stl_p(buf, val);
2673                 break;
2674             case 2:
2675                 /* 16 bit read access */
2676                 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2677                                                       attrs);
2678                 stw_p(buf, val);
2679                 break;
2680             case 1:
2681                 /* 8 bit read access */
2682                 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2683                                                       attrs);
2684                 stb_p(buf, val);
2685                 break;
2686             default:
2687                 abort();
2688             }
2689         } else {
2690             /* RAM case */
2691             ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2692             memcpy(buf, ptr, l);
2693         }
2694
2695         if (release_lock) {
2696             qemu_mutex_unlock_iothread();
2697             release_lock = false;
2698         }
2699
2700         len -= l;
2701         buf += l;
2702         addr += l;
2703
2704         if (!len) {
2705             break;
2706         }
2707
2708         l = len;
2709         mr = address_space_translate(as, addr, &addr1, &l, false);
2710     }
2711
2712     return result;
2713 }
2714
2715 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2716                                     MemTxAttrs attrs, uint8_t *buf, int len)
2717 {
2718     hwaddr l;
2719     hwaddr addr1;
2720     MemoryRegion *mr;
2721     MemTxResult result = MEMTX_OK;
2722
2723     if (len > 0) {
2724         rcu_read_lock();
2725         l = len;
2726         mr = address_space_translate(as, addr, &addr1, &l, false);
2727         result = address_space_read_continue(as, addr, attrs, buf, len,
2728                                              addr1, l, mr);
2729         rcu_read_unlock();
2730     }
2731
2732     return result;
2733 }
2734
2735 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2736                              uint8_t *buf, int len, bool is_write)
2737 {
2738     if (is_write) {
2739         return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2740     } else {
2741         return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2742     }
2743 }
2744
2745 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2746                             int len, int is_write)
2747 {
2748     address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2749                      buf, len, is_write);
2750 }
2751
2752 enum write_rom_type {
2753     WRITE_DATA,
2754     FLUSH_CACHE,
2755 };
2756
2757 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2758     hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2759 {
2760     hwaddr l;
2761     uint8_t *ptr;
2762     hwaddr addr1;
2763     MemoryRegion *mr;
2764
2765     rcu_read_lock();
2766     while (len > 0) {
2767         l = len;
2768         mr = address_space_translate(as, addr, &addr1, &l, true);
2769
2770         if (!(memory_region_is_ram(mr) ||
2771               memory_region_is_romd(mr))) {
2772             l = memory_access_size(mr, l, addr1);
2773         } else {
2774             /* ROM/RAM case */
2775             ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2776             switch (type) {
2777             case WRITE_DATA:
2778                 memcpy(ptr, buf, l);
2779                 invalidate_and_set_dirty(mr, addr1, l);
2780                 break;
2781             case FLUSH_CACHE:
2782                 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2783                 break;
2784             }
2785         }
2786         len -= l;
2787         buf += l;
2788         addr += l;
2789     }
2790     rcu_read_unlock();
2791 }
2792
2793 /* used for ROM loading : can write in RAM and ROM */
2794 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2795                                    const uint8_t *buf, int len)
2796 {
2797     cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2798 }
2799
2800 void cpu_flush_icache_range(hwaddr start, int len)
2801 {
2802     /*
2803      * This function should do the same thing as an icache flush that was
2804      * triggered from within the guest. For TCG we are always cache coherent,
2805      * so there is no need to flush anything. For KVM / Xen we need to flush
2806      * the host's instruction cache at least.
2807      */
2808     if (tcg_enabled()) {
2809         return;
2810     }
2811
2812     cpu_physical_memory_write_rom_internal(&address_space_memory,
2813                                            start, NULL, len, FLUSH_CACHE);
2814 }
2815
2816 typedef struct {
2817     MemoryRegion *mr;
2818     void *buffer;
2819     hwaddr addr;
2820     hwaddr len;
2821     bool in_use;
2822 } BounceBuffer;
2823
2824 static BounceBuffer bounce;
2825
2826 typedef struct MapClient {
2827     QEMUBH *bh;
2828     QLIST_ENTRY(MapClient) link;
2829 } MapClient;
2830
2831 QemuMutex map_client_list_lock;
2832 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2833     = QLIST_HEAD_INITIALIZER(map_client_list);
2834
2835 static void cpu_unregister_map_client_do(MapClient *client)
2836 {
2837     QLIST_REMOVE(client, link);
2838     g_free(client);
2839 }
2840
2841 static void cpu_notify_map_clients_locked(void)
2842 {
2843     MapClient *client;
2844
2845     while (!QLIST_EMPTY(&map_client_list)) {
2846         client = QLIST_FIRST(&map_client_list);
2847         qemu_bh_schedule(client->bh);
2848         cpu_unregister_map_client_do(client);
2849     }
2850 }
2851
2852 void cpu_register_map_client(QEMUBH *bh)
2853 {
2854     MapClient *client = g_malloc(sizeof(*client));
2855
2856     qemu_mutex_lock(&map_client_list_lock);
2857     client->bh = bh;
2858     QLIST_INSERT_HEAD(&map_client_list, client, link);
2859     if (!atomic_read(&bounce.in_use)) {
2860         cpu_notify_map_clients_locked();
2861     }
2862     qemu_mutex_unlock(&map_client_list_lock);
2863 }
2864
2865 void cpu_exec_init_all(void)
2866 {
2867     qemu_mutex_init(&ram_list.mutex);
2868     /* The data structures we set up here depend on knowing the page size,
2869      * so no more changes can be made after this point.
2870      * In an ideal world, nothing we did before we had finished the
2871      * machine setup would care about the target page size, and we could
2872      * do this much later, rather than requiring board models to state
2873      * up front what their requirements are.
2874      */
2875     finalize_target_page_bits();
2876     io_mem_init();
2877     memory_map_init();
2878     qemu_mutex_init(&map_client_list_lock);
2879 }
2880
2881 void cpu_unregister_map_client(QEMUBH *bh)
2882 {
2883     MapClient *client;
2884
2885     qemu_mutex_lock(&map_client_list_lock);
2886     QLIST_FOREACH(client, &map_client_list, link) {
2887         if (client->bh == bh) {
2888             cpu_unregister_map_client_do(client);
2889             break;
2890         }
2891     }
2892     qemu_mutex_unlock(&map_client_list_lock);
2893 }
2894
2895 static void cpu_notify_map_clients(void)
2896 {
2897     qemu_mutex_lock(&map_client_list_lock);
2898     cpu_notify_map_clients_locked();
2899     qemu_mutex_unlock(&map_client_list_lock);
2900 }
2901
2902 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2903 {
2904     MemoryRegion *mr;
2905     hwaddr l, xlat;
2906
2907     rcu_read_lock();
2908     while (len > 0) {
2909         l = len;
2910         mr = address_space_translate(as, addr, &xlat, &l, is_write);
2911         if (!memory_access_is_direct(mr, is_write)) {
2912             l = memory_access_size(mr, l, addr);
2913             if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2914                 return false;
2915             }
2916         }
2917
2918         len -= l;
2919         addr += l;
2920     }
2921     rcu_read_unlock();
2922     return true;
2923 }
2924
2925 /* Map a physical memory region into a host virtual address.
2926  * May map a subset of the requested range, given by and returned in *plen.
2927  * May return NULL if resources needed to perform the mapping are exhausted.
2928  * Use only for reads OR writes - not for read-modify-write operations.
2929  * Use cpu_register_map_client() to know when retrying the map operation is
2930  * likely to succeed.
2931  */
2932 void *address_space_map(AddressSpace *as,
2933                         hwaddr addr,
2934                         hwaddr *plen,
2935                         bool is_write)
2936 {
2937     hwaddr len = *plen;
2938     hwaddr done = 0;
2939     hwaddr l, xlat, base;
2940     MemoryRegion *mr, *this_mr;
2941     void *ptr;
2942
2943     if (len == 0) {
2944         return NULL;
2945     }
2946
2947     l = len;
2948     rcu_read_lock();
2949     mr = address_space_translate(as, addr, &xlat, &l, is_write);
2950
2951     if (!memory_access_is_direct(mr, is_write)) {
2952         if (atomic_xchg(&bounce.in_use, true)) {
2953             rcu_read_unlock();
2954             return NULL;
2955         }
2956         /* Avoid unbounded allocations */
2957         l = MIN(l, TARGET_PAGE_SIZE);
2958         bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2959         bounce.addr = addr;
2960         bounce.len = l;
2961
2962         memory_region_ref(mr);
2963         bounce.mr = mr;
2964         if (!is_write) {
2965             address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2966                                bounce.buffer, l);
2967         }
2968
2969         rcu_read_unlock();
2970         *plen = l;
2971         return bounce.buffer;
2972     }
2973
2974     base = xlat;
2975
2976     for (;;) {
2977         len -= l;
2978         addr += l;
2979         done += l;
2980         if (len == 0) {
2981             break;
2982         }
2983
2984         l = len;
2985         this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2986         if (this_mr != mr || xlat != base + done) {
2987             break;
2988         }
2989     }
2990
2991     memory_region_ref(mr);
2992     *plen = done;
2993     ptr = qemu_ram_ptr_length(mr->ram_block, base, plen);
2994     rcu_read_unlock();
2995
2996     return ptr;
2997 }
2998
2999 /* Unmaps a memory region previously mapped by address_space_map().
3000  * Will also mark the memory as dirty if is_write == 1.  access_len gives
3001  * the amount of memory that was actually read or written by the caller.
3002  */
3003 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3004                          int is_write, hwaddr access_len)
3005 {
3006     if (buffer != bounce.buffer) {
3007         MemoryRegion *mr;
3008         ram_addr_t addr1;
3009
3010         mr = memory_region_from_host(buffer, &addr1);
3011         assert(mr != NULL);
3012         if (is_write) {
3013             invalidate_and_set_dirty(mr, addr1, access_len);
3014         }
3015         if (xen_enabled()) {
3016             xen_invalidate_map_cache_entry(buffer);
3017         }
3018         memory_region_unref(mr);
3019         return;
3020     }
3021     if (is_write) {
3022         address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
3023                             bounce.buffer, access_len);
3024     }
3025     qemu_vfree(bounce.buffer);
3026     bounce.buffer = NULL;
3027     memory_region_unref(bounce.mr);
3028     atomic_mb_set(&bounce.in_use, false);
3029     cpu_notify_map_clients();
3030 }
3031
3032 void *cpu_physical_memory_map(hwaddr addr,
3033                               hwaddr *plen,
3034                               int is_write)
3035 {
3036     return address_space_map(&address_space_memory, addr, plen, is_write);
3037 }
3038
3039 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3040                                int is_write, hwaddr access_len)
3041 {
3042     return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3043 }
3044
3045 /* warning: addr must be aligned */
3046 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3047                                                   MemTxAttrs attrs,
3048                                                   MemTxResult *result,
3049                                                   enum device_endian endian)
3050 {
3051     uint8_t *ptr;
3052     uint64_t val;
3053     MemoryRegion *mr;
3054     hwaddr l = 4;
3055     hwaddr addr1;
3056     MemTxResult r;
3057     bool release_lock = false;
3058
3059     rcu_read_lock();
3060     mr = address_space_translate(as, addr, &addr1, &l, false);
3061     if (l < 4 || !memory_access_is_direct(mr, false)) {
3062         release_lock |= prepare_mmio_access(mr);
3063
3064         /* I/O case */
3065         r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3066 #if defined(TARGET_WORDS_BIGENDIAN)
3067         if (endian == DEVICE_LITTLE_ENDIAN) {
3068             val = bswap32(val);
3069         }
3070 #else
3071         if (endian == DEVICE_BIG_ENDIAN) {
3072             val = bswap32(val);
3073         }
3074 #endif
3075     } else {
3076         /* RAM case */
3077         ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3078         switch (endian) {
3079         case DEVICE_LITTLE_ENDIAN:
3080             val = ldl_le_p(ptr);
3081             break;
3082         case DEVICE_BIG_ENDIAN:
3083             val = ldl_be_p(ptr);
3084             break;
3085         default:
3086             val = ldl_p(ptr);
3087             break;
3088         }
3089         r = MEMTX_OK;
3090     }
3091     if (result) {
3092         *result = r;
3093     }
3094     if (release_lock) {
3095         qemu_mutex_unlock_iothread();
3096     }
3097     rcu_read_unlock();
3098     return val;
3099 }
3100
3101 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3102                            MemTxAttrs attrs, MemTxResult *result)
3103 {
3104     return address_space_ldl_internal(as, addr, attrs, result,
3105                                       DEVICE_NATIVE_ENDIAN);
3106 }
3107
3108 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3109                               MemTxAttrs attrs, MemTxResult *result)
3110 {
3111     return address_space_ldl_internal(as, addr, attrs, result,
3112                                       DEVICE_LITTLE_ENDIAN);
3113 }
3114
3115 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3116                               MemTxAttrs attrs, MemTxResult *result)
3117 {
3118     return address_space_ldl_internal(as, addr, attrs, result,
3119                                       DEVICE_BIG_ENDIAN);
3120 }
3121
3122 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3123 {
3124     return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3125 }
3126
3127 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3128 {
3129     return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3130 }
3131
3132 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3133 {
3134     return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3135 }
3136
3137 /* warning: addr must be aligned */
3138 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3139                                                   MemTxAttrs attrs,
3140                                                   MemTxResult *result,
3141                                                   enum device_endian endian)
3142 {
3143     uint8_t *ptr;
3144     uint64_t val;
3145     MemoryRegion *mr;
3146     hwaddr l = 8;
3147     hwaddr addr1;
3148     MemTxResult r;
3149     bool release_lock = false;
3150
3151     rcu_read_lock();
3152     mr = address_space_translate(as, addr, &addr1, &l,
3153                                  false);
3154     if (l < 8 || !memory_access_is_direct(mr, false)) {
3155         release_lock |= prepare_mmio_access(mr);
3156
3157         /* I/O case */
3158         r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3159 #if defined(TARGET_WORDS_BIGENDIAN)
3160         if (endian == DEVICE_LITTLE_ENDIAN) {
3161             val = bswap64(val);
3162         }
3163 #else
3164         if (endian == DEVICE_BIG_ENDIAN) {
3165             val = bswap64(val);
3166         }
3167 #endif
3168     } else {
3169         /* RAM case */
3170         ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3171         switch (endian) {
3172         case DEVICE_LITTLE_ENDIAN:
3173             val = ldq_le_p(ptr);
3174             break;
3175         case DEVICE_BIG_ENDIAN:
3176             val = ldq_be_p(ptr);
3177             break;
3178         default:
3179             val = ldq_p(ptr);
3180             break;
3181         }
3182         r = MEMTX_OK;
3183     }
3184     if (result) {
3185         *result = r;
3186     }
3187     if (release_lock) {
3188         qemu_mutex_unlock_iothread();
3189     }
3190     rcu_read_unlock();
3191     return val;
3192 }
3193
3194 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3195                            MemTxAttrs attrs, MemTxResult *result)
3196 {
3197     return address_space_ldq_internal(as, addr, attrs, result,
3198                                       DEVICE_NATIVE_ENDIAN);
3199 }
3200
3201 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3202                            MemTxAttrs attrs, MemTxResult *result)
3203 {
3204     return address_space_ldq_internal(as, addr, attrs, result,
3205                                       DEVICE_LITTLE_ENDIAN);
3206 }
3207
3208 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3209                            MemTxAttrs attrs, MemTxResult *result)
3210 {
3211     return address_space_ldq_internal(as, addr, attrs, result,
3212                                       DEVICE_BIG_ENDIAN);
3213 }
3214
3215 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3216 {
3217     return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3218 }
3219
3220 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3221 {
3222     return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3223 }
3224
3225 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3226 {
3227     return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3228 }
3229
3230 /* XXX: optimize */
3231 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3232                             MemTxAttrs attrs, MemTxResult *result)
3233 {
3234     uint8_t val;
3235     MemTxResult r;
3236
3237     r = address_space_rw(as, addr, attrs, &val, 1, 0);
3238     if (result) {
3239         *result = r;
3240     }
3241     return val;
3242 }
3243
3244 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3245 {
3246     return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3247 }
3248
3249 /* warning: addr must be aligned */
3250 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3251                                                    hwaddr addr,
3252                                                    MemTxAttrs attrs,
3253                                                    MemTxResult *result,
3254                                                    enum device_endian endian)
3255 {
3256     uint8_t *ptr;
3257     uint64_t val;
3258     MemoryRegion *mr;
3259     hwaddr l = 2;
3260     hwaddr addr1;
3261     MemTxResult r;
3262     bool release_lock = false;
3263
3264     rcu_read_lock();
3265     mr = address_space_translate(as, addr, &addr1, &l,
3266                                  false);
3267     if (l < 2 || !memory_access_is_direct(mr, false)) {
3268         release_lock |= prepare_mmio_access(mr);
3269
3270         /* I/O case */
3271         r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3272 #if defined(TARGET_WORDS_BIGENDIAN)
3273         if (endian == DEVICE_LITTLE_ENDIAN) {
3274             val = bswap16(val);
3275         }
3276 #else
3277         if (endian == DEVICE_BIG_ENDIAN) {
3278             val = bswap16(val);
3279         }
3280 #endif
3281     } else {
3282         /* RAM case */
3283         ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3284         switch (endian) {
3285         case DEVICE_LITTLE_ENDIAN:
3286             val = lduw_le_p(ptr);
3287             break;
3288         case DEVICE_BIG_ENDIAN:
3289             val = lduw_be_p(ptr);
3290             break;
3291         default:
3292             val = lduw_p(ptr);
3293             break;
3294         }
3295         r = MEMTX_OK;
3296     }
3297     if (result) {
3298         *result = r;
3299     }
3300     if (release_lock) {
3301         qemu_mutex_unlock_iothread();
3302     }
3303     rcu_read_unlock();
3304     return val;
3305 }
3306
3307 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3308                            MemTxAttrs attrs, MemTxResult *result)
3309 {
3310     return address_space_lduw_internal(as, addr, attrs, result,
3311                                        DEVICE_NATIVE_ENDIAN);
3312 }
3313
3314 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3315                            MemTxAttrs attrs, MemTxResult *result)
3316 {
3317     return address_space_lduw_internal(as, addr, attrs, result,
3318                                        DEVICE_LITTLE_ENDIAN);
3319 }
3320
3321 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3322                            MemTxAttrs attrs, MemTxResult *result)
3323 {
3324     return address_space_lduw_internal(as, addr, attrs, result,
3325                                        DEVICE_BIG_ENDIAN);
3326 }
3327
3328 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3329 {
3330     return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3331 }
3332
3333 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3334 {
3335     return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3336 }
3337
3338 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3339 {
3340     return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3341 }
3342
3343 /* warning: addr must be aligned. The ram page is not masked as dirty
3344    and the code inside is not invalidated. It is useful if the dirty
3345    bits are used to track modified PTEs */
3346 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3347                                 MemTxAttrs attrs, MemTxResult *result)
3348 {
3349     uint8_t *ptr;
3350     MemoryRegion *mr;
3351     hwaddr l = 4;
3352     hwaddr addr1;
3353     MemTxResult r;
3354     uint8_t dirty_log_mask;
3355     bool release_lock = false;
3356
3357     rcu_read_lock();
3358     mr = address_space_translate(as, addr, &addr1, &l,
3359                                  true);
3360     if (l < 4 || !memory_access_is_direct(mr, true)) {
3361         release_lock |= prepare_mmio_access(mr);
3362
3363         r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3364     } else {
3365         ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3366         stl_p(ptr, val);
3367
3368         dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3369         dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3370         cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr,
3371                                             4, dirty_log_mask);
3372         r = MEMTX_OK;
3373     }
3374     if (result) {
3375         *result = r;
3376     }
3377     if (release_lock) {
3378         qemu_mutex_unlock_iothread();
3379     }
3380     rcu_read_unlock();
3381 }
3382
3383 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3384 {
3385     address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3386 }
3387
3388 /* warning: addr must be aligned */
3389 static inline void address_space_stl_internal(AddressSpace *as,
3390                                               hwaddr addr, uint32_t val,
3391                                               MemTxAttrs attrs,
3392                                               MemTxResult *result,
3393                                               enum device_endian endian)
3394 {
3395     uint8_t *ptr;
3396     MemoryRegion *mr;
3397     hwaddr l = 4;
3398     hwaddr addr1;
3399     MemTxResult r;
3400     bool release_lock = false;
3401
3402     rcu_read_lock();
3403     mr = address_space_translate(as, addr, &addr1, &l,
3404                                  true);
3405     if (l < 4 || !memory_access_is_direct(mr, true)) {
3406         release_lock |= prepare_mmio_access(mr);
3407
3408 #if defined(TARGET_WORDS_BIGENDIAN)
3409         if (endian == DEVICE_LITTLE_ENDIAN) {
3410             val = bswap32(val);
3411         }
3412 #else
3413         if (endian == DEVICE_BIG_ENDIAN) {
3414             val = bswap32(val);
3415         }
3416 #endif
3417         r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3418     } else {
3419         /* RAM case */
3420         ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3421         switch (endian) {
3422         case DEVICE_LITTLE_ENDIAN:
3423             stl_le_p(ptr, val);
3424             break;
3425         case DEVICE_BIG_ENDIAN:
3426             stl_be_p(ptr, val);
3427             break;
3428         default:
3429             stl_p(ptr, val);
3430             break;
3431         }
3432         invalidate_and_set_dirty(mr, addr1, 4);
3433         r = MEMTX_OK;
3434     }
3435     if (result) {
3436         *result = r;
3437     }
3438     if (release_lock) {
3439         qemu_mutex_unlock_iothread();
3440     }
3441     rcu_read_unlock();
3442 }
3443
3444 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3445                        MemTxAttrs attrs, MemTxResult *result)
3446 {
3447     address_space_stl_internal(as, addr, val, attrs, result,
3448                                DEVICE_NATIVE_ENDIAN);
3449 }
3450
3451 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3452                        MemTxAttrs attrs, MemTxResult *result)
3453 {
3454     address_space_stl_internal(as, addr, val, attrs, result,
3455                                DEVICE_LITTLE_ENDIAN);
3456 }
3457
3458 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3459                        MemTxAttrs attrs, MemTxResult *result)
3460 {
3461     address_space_stl_internal(as, addr, val, attrs, result,
3462                                DEVICE_BIG_ENDIAN);
3463 }
3464
3465 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3466 {
3467     address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3468 }
3469
3470 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3471 {
3472     address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3473 }
3474
3475 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3476 {
3477     address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3478 }
3479
3480 /* XXX: optimize */
3481 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3482                        MemTxAttrs attrs, MemTxResult *result)
3483 {
3484     uint8_t v = val;
3485     MemTxResult r;
3486
3487     r = address_space_rw(as, addr, attrs, &v, 1, 1);
3488     if (result) {
3489         *result = r;
3490     }
3491 }
3492
3493 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3494 {
3495     address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3496 }
3497
3498 /* warning: addr must be aligned */
3499 static inline void address_space_stw_internal(AddressSpace *as,
3500                                               hwaddr addr, uint32_t val,
3501                                               MemTxAttrs attrs,
3502                                               MemTxResult *result,
3503                                               enum device_endian endian)
3504 {
3505     uint8_t *ptr;
3506     MemoryRegion *mr;
3507     hwaddr l = 2;
3508     hwaddr addr1;
3509     MemTxResult r;
3510     bool release_lock = false;
3511
3512     rcu_read_lock();
3513     mr = address_space_translate(as, addr, &addr1, &l, true);
3514     if (l < 2 || !memory_access_is_direct(mr, true)) {
3515         release_lock |= prepare_mmio_access(mr);
3516
3517 #if defined(TARGET_WORDS_BIGENDIAN)
3518         if (endian == DEVICE_LITTLE_ENDIAN) {
3519             val = bswap16(val);
3520         }
3521 #else
3522         if (endian == DEVICE_BIG_ENDIAN) {
3523             val = bswap16(val);
3524         }
3525 #endif
3526         r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3527     } else {
3528         /* RAM case */
3529         ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3530         switch (endian) {
3531         case DEVICE_LITTLE_ENDIAN:
3532             stw_le_p(ptr, val);
3533             break;
3534         case DEVICE_BIG_ENDIAN:
3535             stw_be_p(ptr, val);
3536             break;
3537         default:
3538             stw_p(ptr, val);
3539             break;
3540         }
3541         invalidate_and_set_dirty(mr, addr1, 2);
3542         r = MEMTX_OK;
3543     }
3544     if (result) {
3545         *result = r;
3546     }
3547     if (release_lock) {
3548         qemu_mutex_unlock_iothread();
3549     }
3550     rcu_read_unlock();
3551 }
3552
3553 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3554                        MemTxAttrs attrs, MemTxResult *result)
3555 {
3556     address_space_stw_internal(as, addr, val, attrs, result,
3557                                DEVICE_NATIVE_ENDIAN);
3558 }
3559
3560 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3561                        MemTxAttrs attrs, MemTxResult *result)
3562 {
3563     address_space_stw_internal(as, addr, val, attrs, result,
3564                                DEVICE_LITTLE_ENDIAN);
3565 }
3566
3567 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3568                        MemTxAttrs attrs, MemTxResult *result)
3569 {
3570     address_space_stw_internal(as, addr, val, attrs, result,
3571                                DEVICE_BIG_ENDIAN);
3572 }
3573
3574 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3575 {
3576     address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3577 }
3578
3579 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3580 {
3581     address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3582 }
3583
3584 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3585 {
3586     address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3587 }
3588
3589 /* XXX: optimize */
3590 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3591                        MemTxAttrs attrs, MemTxResult *result)
3592 {
3593     MemTxResult r;
3594     val = tswap64(val);
3595     r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3596     if (result) {
3597         *result = r;
3598     }
3599 }
3600
3601 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3602                        MemTxAttrs attrs, MemTxResult *result)
3603 {
3604     MemTxResult r;
3605     val = cpu_to_le64(val);
3606     r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3607     if (result) {
3608         *result = r;
3609     }
3610 }
3611 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3612                        MemTxAttrs attrs, MemTxResult *result)
3613 {
3614     MemTxResult r;
3615     val = cpu_to_be64(val);
3616     r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3617     if (result) {
3618         *result = r;
3619     }
3620 }
3621
3622 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3623 {
3624     address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3625 }
3626
3627 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3628 {
3629     address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3630 }
3631
3632 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3633 {
3634     address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3635 }
3636
3637 /* virtual memory access for debug (includes writing to ROM) */
3638 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3639                         uint8_t *buf, int len, int is_write)
3640 {
3641     int l;
3642     hwaddr phys_addr;
3643     target_ulong page;
3644
3645     while (len > 0) {
3646         int asidx;
3647         MemTxAttrs attrs;
3648
3649         page = addr & TARGET_PAGE_MASK;
3650         phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3651         asidx = cpu_asidx_from_attrs(cpu, attrs);
3652         /* if no physical page mapped, return an error */
3653         if (phys_addr == -1)
3654             return -1;
3655         l = (page + TARGET_PAGE_SIZE) - addr;
3656         if (l > len)
3657             l = len;
3658         phys_addr += (addr & ~TARGET_PAGE_MASK);
3659         if (is_write) {
3660             cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3661                                           phys_addr, buf, l);
3662         } else {
3663             address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3664                              MEMTXATTRS_UNSPECIFIED,
3665                              buf, l, 0);
3666         }
3667         len -= l;
3668         buf += l;
3669         addr += l;
3670     }
3671     return 0;
3672 }
3673
3674 /*
3675  * Allows code that needs to deal with migration bitmaps etc to still be built
3676  * target independent.
3677  */
3678 size_t qemu_target_page_bits(void)
3679 {
3680     return TARGET_PAGE_BITS;
3681 }
3682
3683 #endif
3684
3685 /*
3686  * A helper function for the _utterly broken_ virtio device model to find out if
3687  * it's running on a big endian machine. Don't do this at home kids!
3688  */
3689 bool target_words_bigendian(void);
3690 bool target_words_bigendian(void)
3691 {
3692 #if defined(TARGET_WORDS_BIGENDIAN)
3693     return true;
3694 #else
3695     return false;
3696 #endif
3697 }
3698
3699 #ifndef CONFIG_USER_ONLY
3700 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3701 {
3702     MemoryRegion*mr;
3703     hwaddr l = 1;
3704     bool res;
3705
3706     rcu_read_lock();
3707     mr = address_space_translate(&address_space_memory,
3708                                  phys_addr, &phys_addr, &l, false);
3709
3710     res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3711     rcu_read_unlock();
3712     return res;
3713 }
3714
3715 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3716 {
3717     RAMBlock *block;
3718     int ret = 0;
3719
3720     rcu_read_lock();
3721     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3722         ret = func(block->idstr, block->host, block->offset,
3723                    block->used_length, opaque);
3724         if (ret) {
3725             break;
3726         }
3727     }
3728     rcu_read_unlock();
3729     return ret;
3730 }
3731 #endif