exec.c

   1 /*
   2  *  Virtual page mapping
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include "config.h"
  20 #ifndef _WIN32
  21 #include <sys/types.h>
  22 #include <sys/mman.h>
  23 #endif
  24
  25 #include "qemu-common.h"
  26 #include "cpu.h"
  27 #include "tcg.h"
  28 #include "hw/hw.h"
  29 #if !defined(CONFIG_USER_ONLY)
  30 #include "hw/boards.h"
  31 #endif
  32 #include "hw/qdev.h"
  33 #include "qemu/osdep.h"
  34 #include "sysemu/kvm.h"
  35 #include "sysemu/sysemu.h"
  36 #include "hw/xen/xen.h"
  37 #include "qemu/timer.h"
  38 #include "qemu/config-file.h"
  39 #include "qemu/error-report.h"
  40 #include "exec/memory.h"
  41 #include "sysemu/dma.h"
  42 #include "exec/address-spaces.h"
  43 #if defined(CONFIG_USER_ONLY)
  44 #include <qemu.h>
  45 #else /* !CONFIG_USER_ONLY */
  46 #include "sysemu/xen-mapcache.h"
  47 #include "trace.h"
  48 #endif
  49 #include "exec/cpu-all.h"
  50 #include "qemu/rcu_queue.h"
  51 #include "qemu/main-loop.h"
  52 #include "exec/cputlb.h"
  53 #include "translate-all.h"
  54
  55 #include "exec/memory-internal.h"
  56 #include "exec/ram_addr.h"
  57
  58 #include "qemu/range.h"
  59
  60 //#define DEBUG_SUBPAGE
  61
  62 #if !defined(CONFIG_USER_ONLY)
  63 /* ram_list is read under rcu_read_lock()/rcu_read_unlock().  Writes
  64  * are protected by the ramlist lock.
  65  */
  66 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
  67
  68 static MemoryRegion *system_memory;
  69 static MemoryRegion *system_io;
  70
  71 AddressSpace address_space_io;
  72 AddressSpace address_space_memory;
  73
  74 MemoryRegion io_mem_rom, io_mem_notdirty;
  75 static MemoryRegion io_mem_unassigned;
  76
  77 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
  78 #define RAM_PREALLOC   (1 << 0)
  79
  80 /* RAM is mmap-ed with MAP_SHARED */
  81 #define RAM_SHARED     (1 << 1)
  82
  83 /* Only a portion of RAM (used_length) is actually used, and migrated.
  84  * This used_length size can change across reboots.
  85  */
  86 #define RAM_RESIZEABLE (1 << 2)
  87
  88 #endif
  89
  90 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
  91 /* current CPU in the current thread. It is only valid inside
  92    cpu_exec() */
  93 DEFINE_TLS(CPUState *, current_cpu);
  94 /* 0 = Do not count executed instructions.
  95    1 = Precise instruction counting.
  96    2 = Adaptive rate instruction counting.  */
  97 int use_icount;
  98
  99 #if !defined(CONFIG_USER_ONLY)
 100
 101 typedef struct PhysPageEntry PhysPageEntry;
 102
 103 struct PhysPageEntry {
 104     /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
 105     uint32_t skip : 6;
 106      /* index into phys_sections (!skip) or phys_map_nodes (skip) */
 107     uint32_t ptr : 26;
 108 };
 109
 110 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
 111
 112 /* Size of the L2 (and L3, etc) page tables.  */
 113 #define ADDR_SPACE_BITS 64
 114
 115 #define P_L2_BITS 9
 116 #define P_L2_SIZE (1 << P_L2_BITS)
 117
 118 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
 119
 120 typedef PhysPageEntry Node[P_L2_SIZE];
 121
 122 typedef struct PhysPageMap {
 123     struct rcu_head rcu;
 124
 125     unsigned sections_nb;
 126     unsigned sections_nb_alloc;
 127     unsigned nodes_nb;
 128     unsigned nodes_nb_alloc;
 129     Node *nodes;
 130     MemoryRegionSection *sections;
 131 } PhysPageMap;
 132
 133 struct AddressSpaceDispatch {
 134     struct rcu_head rcu;
 135
 136     /* This is a multi-level map on the physical address space.
 137      * The bottom level has pointers to MemoryRegionSections.
 138      */
 139     PhysPageEntry phys_map;
 140     PhysPageMap map;
 141     AddressSpace *as;
 142 };
 143
 144 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
 145 typedef struct subpage_t {
 146     MemoryRegion iomem;
 147     AddressSpace *as;
 148     hwaddr base;
 149     uint16_t sub_section[TARGET_PAGE_SIZE];
 150 } subpage_t;
 151
 152 #define PHYS_SECTION_UNASSIGNED 0
 153 #define PHYS_SECTION_NOTDIRTY 1
 154 #define PHYS_SECTION_ROM 2
 155 #define PHYS_SECTION_WATCH 3
 156
 157 static void io_mem_init(void);
 158 static void memory_map_init(void);
 159 static void tcg_commit(MemoryListener *listener);
 160
 161 static MemoryRegion io_mem_watch;
 162 #endif
 163
 164 #if !defined(CONFIG_USER_ONLY)
 165
 166 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
 167 {
 168     if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
 169         map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
 170         map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
 171         map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
 172     }
 173 }
 174
 175 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
 176 {
 177     unsigned i;
 178     uint32_t ret;
 179     PhysPageEntry e;
 180     PhysPageEntry *p;
 181
 182     ret = map->nodes_nb++;
 183     p = map->nodes[ret];
 184     assert(ret != PHYS_MAP_NODE_NIL);
 185     assert(ret != map->nodes_nb_alloc);
 186
 187     e.skip = leaf ? 0 : 1;
 188     e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
 189     for (i = 0; i < P_L2_SIZE; ++i) {
 190         memcpy(&p[i], &e, sizeof(e));
 191     }
 192     return ret;
 193 }
 194
 195 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
 196                                 hwaddr *index, hwaddr *nb, uint16_t leaf,
 197                                 int level)
 198 {
 199     PhysPageEntry *p;
 200     hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
 201
 202     if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
 203         lp->ptr = phys_map_node_alloc(map, level == 0);
 204     }
 205     p = map->nodes[lp->ptr];
 206     lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
 207
 208     while (*nb && lp < &p[P_L2_SIZE]) {
 209         if ((*index & (step - 1)) == 0 && *nb >= step) {
 210             lp->skip = 0;
 211             lp->ptr = leaf;
 212             *index += step;
 213             *nb -= step;
 214         } else {
 215             phys_page_set_level(map, lp, index, nb, leaf, level - 1);
 216         }
 217         ++lp;
 218     }
 219 }
 220
 221 static void phys_page_set(AddressSpaceDispatch *d,
 222                           hwaddr index, hwaddr nb,
 223                           uint16_t leaf)
 224 {
 225     /* Wildly overreserve - it doesn't matter much. */
 226     phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
 227
 228     phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
 229 }
 230
 231 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
 232  * and update our entry so we can skip it and go directly to the destination.
 233  */
 234 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
 235 {
 236     unsigned valid_ptr = P_L2_SIZE;
 237     int valid = 0;
 238     PhysPageEntry *p;
 239     int i;
 240
 241     if (lp->ptr == PHYS_MAP_NODE_NIL) {
 242         return;
 243     }
 244
 245     p = nodes[lp->ptr];
 246     for (i = 0; i < P_L2_SIZE; i++) {
 247         if (p[i].ptr == PHYS_MAP_NODE_NIL) {
 248             continue;
 249         }
 250
 251         valid_ptr = i;
 252         valid++;
 253         if (p[i].skip) {
 254             phys_page_compact(&p[i], nodes, compacted);
 255         }
 256     }
 257
 258     /* We can only compress if there's only one child. */
 259     if (valid != 1) {
 260         return;
 261     }
 262
 263     assert(valid_ptr < P_L2_SIZE);
 264
 265     /* Don't compress if it won't fit in the # of bits we have. */
 266     if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
 267         return;
 268     }
 269
 270     lp->ptr = p[valid_ptr].ptr;
 271     if (!p[valid_ptr].skip) {
 272         /* If our only child is a leaf, make this a leaf. */
 273         /* By design, we should have made this node a leaf to begin with so we
 274          * should never reach here.
 275          * But since it's so simple to handle this, let's do it just in case we
 276          * change this rule.
 277          */
 278         lp->skip = 0;
 279     } else {
 280         lp->skip += p[valid_ptr].skip;
 281     }
 282 }
 283
 284 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
 285 {
 286     DECLARE_BITMAP(compacted, nodes_nb);
 287
 288     if (d->phys_map.skip) {
 289         phys_page_compact(&d->phys_map, d->map.nodes, compacted);
 290     }
 291 }
 292
 293 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
 294                                            Node *nodes, MemoryRegionSection *sections)
 295 {
 296     PhysPageEntry *p;
 297     hwaddr index = addr >> TARGET_PAGE_BITS;
 298     int i;
 299
 300     for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
 301         if (lp.ptr == PHYS_MAP_NODE_NIL) {
 302             return &sections[PHYS_SECTION_UNASSIGNED];
 303         }
 304         p = nodes[lp.ptr];
 305         lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
 306     }
 307
 308     if (sections[lp.ptr].size.hi ||
 309         range_covers_byte(sections[lp.ptr].offset_within_address_space,
 310                           sections[lp.ptr].size.lo, addr)) {
 311         return &sections[lp.ptr];
 312     } else {
 313         return &sections[PHYS_SECTION_UNASSIGNED];
 314     }
 315 }
 316
 317 bool memory_region_is_unassigned(MemoryRegion *mr)
 318 {
 319     return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
 320         && mr != &io_mem_watch;
 321 }
 322
 323 /* Called from RCU critical section */
 324 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
 325                                                         hwaddr addr,
 326                                                         bool resolve_subpage)
 327 {
 328     MemoryRegionSection *section;
 329     subpage_t *subpage;
 330
 331     section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
 332     if (resolve_subpage && section->mr->subpage) {
 333         subpage = container_of(section->mr, subpage_t, iomem);
 334         section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
 335     }
 336     return section;
 337 }
 338
 339 /* Called from RCU critical section */
 340 static MemoryRegionSection *
 341 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
 342                                  hwaddr *plen, bool resolve_subpage)
 343 {
 344     MemoryRegionSection *section;
 345     MemoryRegion *mr;
 346     Int128 diff;
 347
 348     section = address_space_lookup_region(d, addr, resolve_subpage);
 349     /* Compute offset within MemoryRegionSection */
 350     addr -= section->offset_within_address_space;
 351
 352     /* Compute offset within MemoryRegion */
 353     *xlat = addr + section->offset_within_region;
 354
 355     mr = section->mr;
 356
 357     /* MMIO registers can be expected to perform full-width accesses based only
 358      * on their address, without considering adjacent registers that could
 359      * decode to completely different MemoryRegions.  When such registers
 360      * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
 361      * regions overlap wildly.  For this reason we cannot clamp the accesses
 362      * here.
 363      *
 364      * If the length is small (as is the case for address_space_ldl/stl),
 365      * everything works fine.  If the incoming length is large, however,
 366      * the caller really has to do the clamping through memory_access_size.
 367      */
 368     if (memory_region_is_ram(mr)) {
 369         diff = int128_sub(section->size, int128_make64(addr));
 370         *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
 371     }
 372     return section;
 373 }
 374
 375 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
 376 {
 377     if (memory_region_is_ram(mr)) {
 378         return !(is_write && mr->readonly);
 379     }
 380     if (memory_region_is_romd(mr)) {
 381         return !is_write;
 382     }
 383
 384     return false;
 385 }
 386
 387 /* Called from RCU critical section */
 388 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
 389                                       hwaddr *xlat, hwaddr *plen,
 390                                       bool is_write)
 391 {
 392     IOMMUTLBEntry iotlb;
 393     MemoryRegionSection *section;
 394     MemoryRegion *mr;
 395
 396     for (;;) {
 397         AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
 398         section = address_space_translate_internal(d, addr, &addr, plen, true);
 399         mr = section->mr;
 400
 401         if (!mr->iommu_ops) {
 402             break;
 403         }
 404
 405         iotlb = mr->iommu_ops->translate(mr, addr, is_write);
 406         addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
 407                 | (addr & iotlb.addr_mask));
 408         *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
 409         if (!(iotlb.perm & (1 << is_write))) {
 410             mr = &io_mem_unassigned;
 411             break;
 412         }
 413
 414         as = iotlb.target_as;
 415     }
 416
 417     if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
 418         hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
 419         *plen = MIN(page, *plen);
 420     }
 421
 422     *xlat = addr;
 423     return mr;
 424 }
 425
 426 /* Called from RCU critical section */
 427 MemoryRegionSection *
 428 address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
 429                                   hwaddr *xlat, hwaddr *plen)
 430 {
 431     MemoryRegionSection *section;
 432     section = address_space_translate_internal(cpu->memory_dispatch,
 433                                                addr, xlat, plen, false);
 434
 435     assert(!section->mr->iommu_ops);
 436     return section;
 437 }
 438 #endif
 439
 440 #if !defined(CONFIG_USER_ONLY)
 441
 442 static int cpu_common_post_load(void *opaque, int version_id)
 443 {
 444     CPUState *cpu = opaque;
 445
 446     /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
 447        version_id is increased. */
 448     cpu->interrupt_request &= ~0x01;
 449     tlb_flush(cpu, 1);
 450
 451     return 0;
 452 }
 453
 454 static int cpu_common_pre_load(void *opaque)
 455 {
 456     CPUState *cpu = opaque;
 457
 458     cpu->exception_index = -1;
 459
 460     return 0;
 461 }
 462
 463 static bool cpu_common_exception_index_needed(void *opaque)
 464 {
 465     CPUState *cpu = opaque;
 466
 467     return tcg_enabled() && cpu->exception_index != -1;
 468 }
 469
 470 static const VMStateDescription vmstate_cpu_common_exception_index = {
 471     .name = "cpu_common/exception_index",
 472     .version_id = 1,
 473     .minimum_version_id = 1,
 474     .needed = cpu_common_exception_index_needed,
 475     .fields = (VMStateField[]) {
 476         VMSTATE_INT32(exception_index, CPUState),
 477         VMSTATE_END_OF_LIST()
 478     }
 479 };
 480
 481 const VMStateDescription vmstate_cpu_common = {
 482     .name = "cpu_common",
 483     .version_id = 1,
 484     .minimum_version_id = 1,
 485     .pre_load = cpu_common_pre_load,
 486     .post_load = cpu_common_post_load,
 487     .fields = (VMStateField[]) {
 488         VMSTATE_UINT32(halted, CPUState),
 489         VMSTATE_UINT32(interrupt_request, CPUState),
 490         VMSTATE_END_OF_LIST()
 491     },
 492     .subsections = (const VMStateDescription*[]) {
 493         &vmstate_cpu_common_exception_index,
 494         NULL
 495     }
 496 };
 497
 498 #endif
 499
 500 CPUState *qemu_get_cpu(int index)
 501 {
 502     CPUState *cpu;
 503
 504     CPU_FOREACH(cpu) {
 505         if (cpu->cpu_index == index) {
 506             return cpu;
 507         }
 508     }
 509
 510     return NULL;
 511 }
 512
 513 #if !defined(CONFIG_USER_ONLY)
 514 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
 515 {
 516     /* We only support one address space per cpu at the moment.  */
 517     assert(cpu->as == as);
 518
 519     if (cpu->tcg_as_listener) {
 520         memory_listener_unregister(cpu->tcg_as_listener);
 521     } else {
 522         cpu->tcg_as_listener = g_new0(MemoryListener, 1);
 523     }
 524     cpu->tcg_as_listener->commit = tcg_commit;
 525     memory_listener_register(cpu->tcg_as_listener, as);
 526 }
 527 #endif
 528
 529 void cpu_exec_init(CPUArchState *env)
 530 {
 531     CPUState *cpu = ENV_GET_CPU(env);
 532     CPUClass *cc = CPU_GET_CLASS(cpu);
 533     CPUState *some_cpu;
 534     int cpu_index;
 535
 536 #if defined(CONFIG_USER_ONLY)
 537     cpu_list_lock();
 538 #endif
 539     cpu_index = 0;
 540     CPU_FOREACH(some_cpu) {
 541         cpu_index++;
 542     }
 543     cpu->cpu_index = cpu_index;
 544     cpu->numa_node = 0;
 545     QTAILQ_INIT(&cpu->breakpoints);
 546     QTAILQ_INIT(&cpu->watchpoints);
 547 #ifndef CONFIG_USER_ONLY
 548     cpu->as = &address_space_memory;
 549     cpu->thread_id = qemu_get_thread_id();
 550     cpu_reload_memory_map(cpu);
 551 #endif
 552     QTAILQ_INSERT_TAIL(&cpus, cpu, node);
 553 #if defined(CONFIG_USER_ONLY)
 554     cpu_list_unlock();
 555 #endif
 556     if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
 557         vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
 558     }
 559 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
 560     register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
 561                     cpu_save, cpu_load, env);
 562     assert(cc->vmsd == NULL);
 563     assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
 564 #endif
 565     if (cc->vmsd != NULL) {
 566         vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
 567     }
 568 }
 569
 570 #if defined(CONFIG_USER_ONLY)
 571 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 572 {
 573     tb_invalidate_phys_page_range(pc, pc + 1, 0);
 574 }
 575 #else
 576 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 577 {
 578     hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
 579     if (phys != -1) {
 580         tb_invalidate_phys_addr(cpu->as,
 581                                 phys | (pc & ~TARGET_PAGE_MASK));
 582     }
 583 }
 584 #endif
 585
 586 #if defined(CONFIG_USER_ONLY)
 587 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 588
 589 {
 590 }
 591
 592 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 593                           int flags)
 594 {
 595     return -ENOSYS;
 596 }
 597
 598 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 599 {
 600 }
 601
 602 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 603                           int flags, CPUWatchpoint **watchpoint)
 604 {
 605     return -ENOSYS;
 606 }
 607 #else
 608 /* Add a watchpoint.  */
 609 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 610                           int flags, CPUWatchpoint **watchpoint)
 611 {
 612     CPUWatchpoint *wp;
 613
 614     /* forbid ranges which are empty or run off the end of the address space */
 615     if (len == 0 || (addr + len - 1) < addr) {
 616         error_report("tried to set invalid watchpoint at %"
 617                      VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
 618         return -EINVAL;
 619     }
 620     wp = g_malloc(sizeof(*wp));
 621
 622     wp->vaddr = addr;
 623     wp->len = len;
 624     wp->flags = flags;
 625
 626     /* keep all GDB-injected watchpoints in front */
 627     if (flags & BP_GDB) {
 628         QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
 629     } else {
 630         QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
 631     }
 632
 633     tlb_flush_page(cpu, addr);
 634
 635     if (watchpoint)
 636         *watchpoint = wp;
 637     return 0;
 638 }
 639
 640 /* Remove a specific watchpoint.  */
 641 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 642                           int flags)
 643 {
 644     CPUWatchpoint *wp;
 645
 646     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
 647         if (addr == wp->vaddr && len == wp->len
 648                 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
 649             cpu_watchpoint_remove_by_ref(cpu, wp);
 650             return 0;
 651         }
 652     }
 653     return -ENOENT;
 654 }
 655
 656 /* Remove a specific watchpoint by reference.  */
 657 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 658 {
 659     QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
 660
 661     tlb_flush_page(cpu, watchpoint->vaddr);
 662
 663     g_free(watchpoint);
 664 }
 665
 666 /* Remove all matching watchpoints.  */
 667 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 668 {
 669     CPUWatchpoint *wp, *next;
 670
 671     QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
 672         if (wp->flags & mask) {
 673             cpu_watchpoint_remove_by_ref(cpu, wp);
 674         }
 675     }
 676 }
 677
 678 /* Return true if this watchpoint address matches the specified
 679  * access (ie the address range covered by the watchpoint overlaps
 680  * partially or completely with the address range covered by the
 681  * access).
 682  */
 683 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
 684                                                   vaddr addr,
 685                                                   vaddr len)
 686 {
 687     /* We know the lengths are non-zero, but a little caution is
 688      * required to avoid errors in the case where the range ends
 689      * exactly at the top of the address space and so addr + len
 690      * wraps round to zero.
 691      */
 692     vaddr wpend = wp->vaddr + wp->len - 1;
 693     vaddr addrend = addr + len - 1;
 694
 695     return !(addr > wpend || wp->vaddr > addrend);
 696 }
 697
 698 #endif
 699
 700 /* Add a breakpoint.  */
 701 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
 702                           CPUBreakpoint **breakpoint)
 703 {
 704     CPUBreakpoint *bp;
 705
 706     bp = g_malloc(sizeof(*bp));
 707
 708     bp->pc = pc;
 709     bp->flags = flags;
 710
 711     /* keep all GDB-injected breakpoints in front */
 712     if (flags & BP_GDB) {
 713         QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
 714     } else {
 715         QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
 716     }
 717
 718     breakpoint_invalidate(cpu, pc);
 719
 720     if (breakpoint) {
 721         *breakpoint = bp;
 722     }
 723     return 0;
 724 }
 725
 726 /* Remove a specific breakpoint.  */
 727 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
 728 {
 729     CPUBreakpoint *bp;
 730
 731     QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
 732         if (bp->pc == pc && bp->flags == flags) {
 733             cpu_breakpoint_remove_by_ref(cpu, bp);
 734             return 0;
 735         }
 736     }
 737     return -ENOENT;
 738 }
 739
 740 /* Remove a specific breakpoint by reference.  */
 741 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
 742 {
 743     QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
 744
 745     breakpoint_invalidate(cpu, breakpoint->pc);
 746
 747     g_free(breakpoint);
 748 }
 749
 750 /* Remove all matching breakpoints. */
 751 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
 752 {
 753     CPUBreakpoint *bp, *next;
 754
 755     QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
 756         if (bp->flags & mask) {
 757             cpu_breakpoint_remove_by_ref(cpu, bp);
 758         }
 759     }
 760 }
 761
 762 /* enable or disable single step mode. EXCP_DEBUG is returned by the
 763    CPU loop after each instruction */
 764 void cpu_single_step(CPUState *cpu, int enabled)
 765 {
 766     if (cpu->singlestep_enabled != enabled) {
 767         cpu->singlestep_enabled = enabled;
 768         if (kvm_enabled()) {
 769             kvm_update_guest_debug(cpu, 0);
 770         } else {
 771             /* must flush all the translated code to avoid inconsistencies */
 772             /* XXX: only flush what is necessary */
 773             CPUArchState *env = cpu->env_ptr;
 774             tb_flush(env);
 775         }
 776     }
 777 }
 778
 779 void cpu_abort(CPUState *cpu, const char *fmt, ...)
 780 {
 781     va_list ap;
 782     va_list ap2;
 783
 784     va_start(ap, fmt);
 785     va_copy(ap2, ap);
 786     fprintf(stderr, "qemu: fatal: ");
 787     vfprintf(stderr, fmt, ap);
 788     fprintf(stderr, "\n");
 789     cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 790     if (qemu_log_enabled()) {
 791         qemu_log("qemu: fatal: ");
 792         qemu_log_vprintf(fmt, ap2);
 793         qemu_log("\n");
 794         log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 795         qemu_log_flush();
 796         qemu_log_close();
 797     }
 798     va_end(ap2);
 799     va_end(ap);
 800 #if defined(CONFIG_USER_ONLY)
 801     {
 802         struct sigaction act;
 803         sigfillset(&act.sa_mask);
 804         act.sa_handler = SIG_DFL;
 805         sigaction(SIGABRT, &act, NULL);
 806     }
 807 #endif
 808     abort();
 809 }
 810
 811 #if !defined(CONFIG_USER_ONLY)
 812 /* Called from RCU critical section */
 813 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
 814 {
 815     RAMBlock *block;
 816
 817     block = atomic_rcu_read(&ram_list.mru_block);
 818     if (block && addr - block->offset < block->max_length) {
 819         goto found;
 820     }
 821     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
 822         if (addr - block->offset < block->max_length) {
 823             goto found;
 824         }
 825     }
 826
 827     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
 828     abort();
 829
 830 found:
 831     /* It is safe to write mru_block outside the iothread lock.  This
 832      * is what happens:
 833      *
 834      *     mru_block = xxx
 835      *     rcu_read_unlock()
 836      *                                        xxx removed from list
 837      *                  rcu_read_lock()
 838      *                  read mru_block
 839      *                                        mru_block = NULL;
 840      *                                        call_rcu(reclaim_ramblock, xxx);
 841      *                  rcu_read_unlock()
 842      *
 843      * atomic_rcu_set is not needed here.  The block was already published
 844      * when it was placed into the list.  Here we're just making an extra
 845      * copy of the pointer.
 846      */
 847     ram_list.mru_block = block;
 848     return block;
 849 }
 850
 851 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
 852 {
 853     ram_addr_t start1;
 854     RAMBlock *block;
 855     ram_addr_t end;
 856
 857     end = TARGET_PAGE_ALIGN(start + length);
 858     start &= TARGET_PAGE_MASK;
 859
 860     rcu_read_lock();
 861     block = qemu_get_ram_block(start);
 862     assert(block == qemu_get_ram_block(end - 1));
 863     start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
 864     cpu_tlb_reset_dirty_all(start1, length);
 865     rcu_read_unlock();
 866 }
 867
 868 /* Note: start and end must be within the same ram block.  */
 869 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
 870                                               ram_addr_t length,
 871                                               unsigned client)
 872 {
 873     unsigned long end, page;
 874     bool dirty;
 875
 876     if (length == 0) {
 877         return false;
 878     }
 879
 880     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
 881     page = start >> TARGET_PAGE_BITS;
 882     dirty = bitmap_test_and_clear_atomic(ram_list.dirty_memory[client],
 883                                          page, end - page);
 884
 885     if (dirty && tcg_enabled()) {
 886         tlb_reset_dirty_range_all(start, length);
 887     }
 888
 889     return dirty;
 890 }
 891
 892 /* Called from RCU critical section */
 893 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
 894                                        MemoryRegionSection *section,
 895                                        target_ulong vaddr,
 896                                        hwaddr paddr, hwaddr xlat,
 897                                        int prot,
 898                                        target_ulong *address)
 899 {
 900     hwaddr iotlb;
 901     CPUWatchpoint *wp;
 902
 903     if (memory_region_is_ram(section->mr)) {
 904         /* Normal RAM.  */
 905         iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
 906             + xlat;
 907         if (!section->readonly) {
 908             iotlb |= PHYS_SECTION_NOTDIRTY;
 909         } else {
 910             iotlb |= PHYS_SECTION_ROM;
 911         }
 912     } else {
 913         iotlb = section - section->address_space->dispatch->map.sections;
 914         iotlb += xlat;
 915     }
 916
 917     /* Make accesses to pages with watchpoints go via the
 918        watchpoint trap routines.  */
 919     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
 920         if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
 921             /* Avoid trapping reads of pages with a write breakpoint. */
 922             if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
 923                 iotlb = PHYS_SECTION_WATCH + paddr;
 924                 *address |= TLB_MMIO;
 925                 break;
 926             }
 927         }
 928     }
 929
 930     return iotlb;
 931 }
 932 #endif /* defined(CONFIG_USER_ONLY) */
 933
 934 #if !defined(CONFIG_USER_ONLY)
 935
 936 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
 937                              uint16_t section);
 938 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
 939
 940 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
 941                                qemu_anon_ram_alloc;
 942
 943 /*
 944  * Set a custom physical guest memory alloator.
 945  * Accelerators with unusual needs may need this.  Hopefully, we can
 946  * get rid of it eventually.
 947  */
 948 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
 949 {
 950     phys_mem_alloc = alloc;
 951 }
 952
 953 static uint16_t phys_section_add(PhysPageMap *map,
 954                                  MemoryRegionSection *section)
 955 {
 956     /* The physical section number is ORed with a page-aligned
 957      * pointer to produce the iotlb entries.  Thus it should
 958      * never overflow into the page-aligned value.
 959      */
 960     assert(map->sections_nb < TARGET_PAGE_SIZE);
 961
 962     if (map->sections_nb == map->sections_nb_alloc) {
 963         map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
 964         map->sections = g_renew(MemoryRegionSection, map->sections,
 965                                 map->sections_nb_alloc);
 966     }
 967     map->sections[map->sections_nb] = *section;
 968     memory_region_ref(section->mr);
 969     return map->sections_nb++;
 970 }
 971
 972 static void phys_section_destroy(MemoryRegion *mr)
 973 {
 974     memory_region_unref(mr);
 975
 976     if (mr->subpage) {
 977         subpage_t *subpage = container_of(mr, subpage_t, iomem);
 978         object_unref(OBJECT(&subpage->iomem));
 979         g_free(subpage);
 980     }
 981 }
 982
 983 static void phys_sections_free(PhysPageMap *map)
 984 {
 985     while (map->sections_nb > 0) {
 986         MemoryRegionSection *section = &map->sections[--map->sections_nb];
 987         phys_section_destroy(section->mr);
 988     }
 989     g_free(map->sections);
 990     g_free(map->nodes);
 991 }
 992
 993 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
 994 {
 995     subpage_t *subpage;
 996     hwaddr base = section->offset_within_address_space
 997         & TARGET_PAGE_MASK;
 998     MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
 999                                                    d->map.nodes, d->map.sections);
1000     MemoryRegionSection subsection = {
1001         .offset_within_address_space = base,
1002         .size = int128_make64(TARGET_PAGE_SIZE),
1003     };
1004     hwaddr start, end;
1005
1006     assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1007
1008     if (!(existing->mr->subpage)) {
1009         subpage = subpage_init(d->as, base);
1010         subsection.address_space = d->as;
1011         subsection.mr = &subpage->iomem;
1012         phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1013                       phys_section_add(&d->map, &subsection));
1014     } else {
1015         subpage = container_of(existing->mr, subpage_t, iomem);
1016     }
1017     start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1018     end = start + int128_get64(section->size) - 1;
1019     subpage_register(subpage, start, end,
1020                      phys_section_add(&d->map, section));
1021 }
1022
1023
1024 static void register_multipage(AddressSpaceDispatch *d,
1025                                MemoryRegionSection *section)
1026 {
1027     hwaddr start_addr = section->offset_within_address_space;
1028     uint16_t section_index = phys_section_add(&d->map, section);
1029     uint64_t num_pages = int128_get64(int128_rshift(section->size,
1030                                                     TARGET_PAGE_BITS));
1031
1032     assert(num_pages);
1033     phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1034 }
1035
1036 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1037 {
1038     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1039     AddressSpaceDispatch *d = as->next_dispatch;
1040     MemoryRegionSection now = *section, remain = *section;
1041     Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1042
1043     if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1044         uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1045                        - now.offset_within_address_space;
1046
1047         now.size = int128_min(int128_make64(left), now.size);
1048         register_subpage(d, &now);
1049     } else {
1050         now.size = int128_zero();
1051     }
1052     while (int128_ne(remain.size, now.size)) {
1053         remain.size = int128_sub(remain.size, now.size);
1054         remain.offset_within_address_space += int128_get64(now.size);
1055         remain.offset_within_region += int128_get64(now.size);
1056         now = remain;
1057         if (int128_lt(remain.size, page_size)) {
1058             register_subpage(d, &now);
1059         } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1060             now.size = page_size;
1061             register_subpage(d, &now);
1062         } else {
1063             now.size = int128_and(now.size, int128_neg(page_size));
1064             register_multipage(d, &now);
1065         }
1066     }
1067 }
1068
1069 void qemu_flush_coalesced_mmio_buffer(void)
1070 {
1071     if (kvm_enabled())
1072         kvm_flush_coalesced_mmio_buffer();
1073 }
1074
1075 void qemu_mutex_lock_ramlist(void)
1076 {
1077     qemu_mutex_lock(&ram_list.mutex);
1078 }
1079
1080 void qemu_mutex_unlock_ramlist(void)
1081 {
1082     qemu_mutex_unlock(&ram_list.mutex);
1083 }
1084
1085 #ifdef __linux__
1086
1087 #include <sys/vfs.h>
1088
1089 #define HUGETLBFS_MAGIC       0x958458f6
1090
1091 static long gethugepagesize(const char *path, Error **errp)
1092 {
1093     struct statfs fs;
1094     int ret;
1095
1096     do {
1097         ret = statfs(path, &fs);
1098     } while (ret != 0 && errno == EINTR);
1099
1100     if (ret != 0) {
1101         error_setg_errno(errp, errno, "failed to get page size of file %s",
1102                          path);
1103         return 0;
1104     }
1105
1106     if (fs.f_type != HUGETLBFS_MAGIC)
1107         fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1108
1109     return fs.f_bsize;
1110 }
1111
1112 static void *file_ram_alloc(RAMBlock *block,
1113                             ram_addr_t memory,
1114                             const char *path,
1115                             Error **errp)
1116 {
1117     char *filename;
1118     char *sanitized_name;
1119     char *c;
1120     void *area = NULL;
1121     int fd;
1122     uint64_t hpagesize;
1123     Error *local_err = NULL;
1124
1125     hpagesize = gethugepagesize(path, &local_err);
1126     if (local_err) {
1127         error_propagate(errp, local_err);
1128         goto error;
1129     }
1130     block->mr->align = hpagesize;
1131
1132     if (memory < hpagesize) {
1133         error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1134                    "or larger than huge page size 0x%" PRIx64,
1135                    memory, hpagesize);
1136         goto error;
1137     }
1138
1139     if (kvm_enabled() && !kvm_has_sync_mmu()) {
1140         error_setg(errp,
1141                    "host lacks kvm mmu notifiers, -mem-path unsupported");
1142         goto error;
1143     }
1144
1145     /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1146     sanitized_name = g_strdup(memory_region_name(block->mr));
1147     for (c = sanitized_name; *c != '\0'; c++) {
1148         if (*c == '/')
1149             *c = '_';
1150     }
1151
1152     filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1153                                sanitized_name);
1154     g_free(sanitized_name);
1155
1156     fd = mkstemp(filename);
1157     if (fd < 0) {
1158         error_setg_errno(errp, errno,
1159                          "unable to create backing store for hugepages");
1160         g_free(filename);
1161         goto error;
1162     }
1163     unlink(filename);
1164     g_free(filename);
1165
1166     memory = (memory+hpagesize-1) & ~(hpagesize-1);
1167
1168     /*
1169      * ftruncate is not supported by hugetlbfs in older
1170      * hosts, so don't bother bailing out on errors.
1171      * If anything goes wrong with it under other filesystems,
1172      * mmap will fail.
1173      */
1174     if (ftruncate(fd, memory)) {
1175         perror("ftruncate");
1176     }
1177
1178     area = mmap(0, memory, PROT_READ | PROT_WRITE,
1179                 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
1180                 fd, 0);
1181     if (area == MAP_FAILED) {
1182         error_setg_errno(errp, errno,
1183                          "unable to map backing store for hugepages");
1184         close(fd);
1185         goto error;
1186     }
1187
1188     if (mem_prealloc) {
1189         os_mem_prealloc(fd, area, memory);
1190     }
1191
1192     block->fd = fd;
1193     return area;
1194
1195 error:
1196     if (mem_prealloc) {
1197         error_report("%s", error_get_pretty(*errp));
1198         exit(1);
1199     }
1200     return NULL;
1201 }
1202 #endif
1203
1204 /* Called with the ramlist lock held.  */
1205 static ram_addr_t find_ram_offset(ram_addr_t size)
1206 {
1207     RAMBlock *block, *next_block;
1208     ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1209
1210     assert(size != 0); /* it would hand out same offset multiple times */
1211
1212     if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1213         return 0;
1214     }
1215
1216     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1217         ram_addr_t end, next = RAM_ADDR_MAX;
1218
1219         end = block->offset + block->max_length;
1220
1221         QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1222             if (next_block->offset >= end) {
1223                 next = MIN(next, next_block->offset);
1224             }
1225         }
1226         if (next - end >= size && next - end < mingap) {
1227             offset = end;
1228             mingap = next - end;
1229         }
1230     }
1231
1232     if (offset == RAM_ADDR_MAX) {
1233         fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1234                 (uint64_t)size);
1235         abort();
1236     }
1237
1238     return offset;
1239 }
1240
1241 ram_addr_t last_ram_offset(void)
1242 {
1243     RAMBlock *block;
1244     ram_addr_t last = 0;
1245
1246     rcu_read_lock();
1247     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1248         last = MAX(last, block->offset + block->max_length);
1249     }
1250     rcu_read_unlock();
1251     return last;
1252 }
1253
1254 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1255 {
1256     int ret;
1257
1258     /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1259     if (!machine_dump_guest_core(current_machine)) {
1260         ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1261         if (ret) {
1262             perror("qemu_madvise");
1263             fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1264                             "but dump_guest_core=off specified\n");
1265         }
1266     }
1267 }
1268
1269 /* Called within an RCU critical section, or while the ramlist lock
1270  * is held.
1271  */
1272 static RAMBlock *find_ram_block(ram_addr_t addr)
1273 {
1274     RAMBlock *block;
1275
1276     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1277         if (block->offset == addr) {
1278             return block;
1279         }
1280     }
1281
1282     return NULL;
1283 }
1284
1285 /* Called with iothread lock held.  */
1286 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1287 {
1288     RAMBlock *new_block, *block;
1289
1290     rcu_read_lock();
1291     new_block = find_ram_block(addr);
1292     assert(new_block);
1293     assert(!new_block->idstr[0]);
1294
1295     if (dev) {
1296         char *id = qdev_get_dev_path(dev);
1297         if (id) {
1298             snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1299             g_free(id);
1300         }
1301     }
1302     pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1303
1304     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1305         if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1306             fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1307                     new_block->idstr);
1308             abort();
1309         }
1310     }
1311     rcu_read_unlock();
1312 }
1313
1314 /* Called with iothread lock held.  */
1315 void qemu_ram_unset_idstr(ram_addr_t addr)
1316 {
1317     RAMBlock *block;
1318
1319     /* FIXME: arch_init.c assumes that this is not called throughout
1320      * migration.  Ignore the problem since hot-unplug during migration
1321      * does not work anyway.
1322      */
1323
1324     rcu_read_lock();
1325     block = find_ram_block(addr);
1326     if (block) {
1327         memset(block->idstr, 0, sizeof(block->idstr));
1328     }
1329     rcu_read_unlock();
1330 }
1331
1332 static int memory_try_enable_merging(void *addr, size_t len)
1333 {
1334     if (!machine_mem_merge(current_machine)) {
1335         /* disabled by the user */
1336         return 0;
1337     }
1338
1339     return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1340 }
1341
1342 /* Only legal before guest might have detected the memory size: e.g. on
1343  * incoming migration, or right after reset.
1344  *
1345  * As memory core doesn't know how is memory accessed, it is up to
1346  * resize callback to update device state and/or add assertions to detect
1347  * misuse, if necessary.
1348  */
1349 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1350 {
1351     RAMBlock *block = find_ram_block(base);
1352
1353     assert(block);
1354
1355     newsize = TARGET_PAGE_ALIGN(newsize);
1356
1357     if (block->used_length == newsize) {
1358         return 0;
1359     }
1360
1361     if (!(block->flags & RAM_RESIZEABLE)) {
1362         error_setg_errno(errp, EINVAL,
1363                          "Length mismatch: %s: 0x" RAM_ADDR_FMT
1364                          " in != 0x" RAM_ADDR_FMT, block->idstr,
1365                          newsize, block->used_length);
1366         return -EINVAL;
1367     }
1368
1369     if (block->max_length < newsize) {
1370         error_setg_errno(errp, EINVAL,
1371                          "Length too large: %s: 0x" RAM_ADDR_FMT
1372                          " > 0x" RAM_ADDR_FMT, block->idstr,
1373                          newsize, block->max_length);
1374         return -EINVAL;
1375     }
1376
1377     cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1378     block->used_length = newsize;
1379     cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1380                                         DIRTY_CLIENTS_ALL);
1381     memory_region_set_size(block->mr, newsize);
1382     if (block->resized) {
1383         block->resized(block->idstr, newsize, block->host);
1384     }
1385     return 0;
1386 }
1387
1388 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1389 {
1390     RAMBlock *block;
1391     RAMBlock *last_block = NULL;
1392     ram_addr_t old_ram_size, new_ram_size;
1393
1394     old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1395
1396     qemu_mutex_lock_ramlist();
1397     new_block->offset = find_ram_offset(new_block->max_length);
1398
1399     if (!new_block->host) {
1400         if (xen_enabled()) {
1401             xen_ram_alloc(new_block->offset, new_block->max_length,
1402                           new_block->mr);
1403         } else {
1404             new_block->host = phys_mem_alloc(new_block->max_length,
1405                                              &new_block->mr->align);
1406             if (!new_block->host) {
1407                 error_setg_errno(errp, errno,
1408                                  "cannot set up guest memory '%s'",
1409                                  memory_region_name(new_block->mr));
1410                 qemu_mutex_unlock_ramlist();
1411                 return -1;
1412             }
1413             memory_try_enable_merging(new_block->host, new_block->max_length);
1414         }
1415     }
1416
1417     new_ram_size = MAX(old_ram_size,
1418               (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1419     if (new_ram_size > old_ram_size) {
1420         migration_bitmap_extend(old_ram_size, new_ram_size);
1421     }
1422     /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
1423      * QLIST (which has an RCU-friendly variant) does not have insertion at
1424      * tail, so save the last element in last_block.
1425      */
1426     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1427         last_block = block;
1428         if (block->max_length < new_block->max_length) {
1429             break;
1430         }
1431     }
1432     if (block) {
1433         QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1434     } else if (last_block) {
1435         QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1436     } else { /* list is empty */
1437         QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1438     }
1439     ram_list.mru_block = NULL;
1440
1441     /* Write list before version */
1442     smp_wmb();
1443     ram_list.version++;
1444     qemu_mutex_unlock_ramlist();
1445
1446     new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1447
1448     if (new_ram_size > old_ram_size) {
1449         int i;
1450
1451         /* ram_list.dirty_memory[] is protected by the iothread lock.  */
1452         for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1453             ram_list.dirty_memory[i] =
1454                 bitmap_zero_extend(ram_list.dirty_memory[i],
1455                                    old_ram_size, new_ram_size);
1456        }
1457     }
1458     cpu_physical_memory_set_dirty_range(new_block->offset,
1459                                         new_block->used_length,
1460                                         DIRTY_CLIENTS_ALL);
1461
1462     if (new_block->host) {
1463         qemu_ram_setup_dump(new_block->host, new_block->max_length);
1464         qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1465         qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1466         if (kvm_enabled()) {
1467             kvm_setup_guest_memory(new_block->host, new_block->max_length);
1468         }
1469     }
1470
1471     return new_block->offset;
1472 }
1473
1474 #ifdef __linux__
1475 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1476                                     bool share, const char *mem_path,
1477                                     Error **errp)
1478 {
1479     RAMBlock *new_block;
1480     ram_addr_t addr;
1481     Error *local_err = NULL;
1482
1483     if (xen_enabled()) {
1484         error_setg(errp, "-mem-path not supported with Xen");
1485         return -1;
1486     }
1487
1488     if (phys_mem_alloc != qemu_anon_ram_alloc) {
1489         /*
1490          * file_ram_alloc() needs to allocate just like
1491          * phys_mem_alloc, but we haven't bothered to provide
1492          * a hook there.
1493          */
1494         error_setg(errp,
1495                    "-mem-path not supported with this accelerator");
1496         return -1;
1497     }
1498
1499     size = TARGET_PAGE_ALIGN(size);
1500     new_block = g_malloc0(sizeof(*new_block));
1501     new_block->mr = mr;
1502     new_block->used_length = size;
1503     new_block->max_length = size;
1504     new_block->flags = share ? RAM_SHARED : 0;
1505     new_block->host = file_ram_alloc(new_block, size,
1506                                      mem_path, errp);
1507     if (!new_block->host) {
1508         g_free(new_block);
1509         return -1;
1510     }
1511
1512     addr = ram_block_add(new_block, &local_err);
1513     if (local_err) {
1514         g_free(new_block);
1515         error_propagate(errp, local_err);
1516         return -1;
1517     }
1518     return addr;
1519 }
1520 #endif
1521
1522 static
1523 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1524                                    void (*resized)(const char*,
1525                                                    uint64_t length,
1526                                                    void *host),
1527                                    void *host, bool resizeable,
1528                                    MemoryRegion *mr, Error **errp)
1529 {
1530     RAMBlock *new_block;
1531     ram_addr_t addr;
1532     Error *local_err = NULL;
1533
1534     size = TARGET_PAGE_ALIGN(size);
1535     max_size = TARGET_PAGE_ALIGN(max_size);
1536     new_block = g_malloc0(sizeof(*new_block));
1537     new_block->mr = mr;
1538     new_block->resized = resized;
1539     new_block->used_length = size;
1540     new_block->max_length = max_size;
1541     assert(max_size >= size);
1542     new_block->fd = -1;
1543     new_block->host = host;
1544     if (host) {
1545         new_block->flags |= RAM_PREALLOC;
1546     }
1547     if (resizeable) {
1548         new_block->flags |= RAM_RESIZEABLE;
1549     }
1550     addr = ram_block_add(new_block, &local_err);
1551     if (local_err) {
1552         g_free(new_block);
1553         error_propagate(errp, local_err);
1554         return -1;
1555     }
1556     return addr;
1557 }
1558
1559 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1560                                    MemoryRegion *mr, Error **errp)
1561 {
1562     return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1563 }
1564
1565 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1566 {
1567     return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1568 }
1569
1570 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1571                                      void (*resized)(const char*,
1572                                                      uint64_t length,
1573                                                      void *host),
1574                                      MemoryRegion *mr, Error **errp)
1575 {
1576     return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1577 }
1578
1579 void qemu_ram_free_from_ptr(ram_addr_t addr)
1580 {
1581     RAMBlock *block;
1582
1583     qemu_mutex_lock_ramlist();
1584     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1585         if (addr == block->offset) {
1586             QLIST_REMOVE_RCU(block, next);
1587             ram_list.mru_block = NULL;
1588             /* Write list before version */
1589             smp_wmb();
1590             ram_list.version++;
1591             g_free_rcu(block, rcu);
1592             break;
1593         }
1594     }
1595     qemu_mutex_unlock_ramlist();
1596 }
1597
1598 static void reclaim_ramblock(RAMBlock *block)
1599 {
1600     if (block->flags & RAM_PREALLOC) {
1601         ;
1602     } else if (xen_enabled()) {
1603         xen_invalidate_map_cache_entry(block->host);
1604 #ifndef _WIN32
1605     } else if (block->fd >= 0) {
1606         munmap(block->host, block->max_length);
1607         close(block->fd);
1608 #endif
1609     } else {
1610         qemu_anon_ram_free(block->host, block->max_length);
1611     }
1612     g_free(block);
1613 }
1614
1615 void qemu_ram_free(ram_addr_t addr)
1616 {
1617     RAMBlock *block;
1618
1619     qemu_mutex_lock_ramlist();
1620     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1621         if (addr == block->offset) {
1622             QLIST_REMOVE_RCU(block, next);
1623             ram_list.mru_block = NULL;
1624             /* Write list before version */
1625             smp_wmb();
1626             ram_list.version++;
1627             call_rcu(block, reclaim_ramblock, rcu);
1628             break;
1629         }
1630     }
1631     qemu_mutex_unlock_ramlist();
1632 }
1633
1634 #ifndef _WIN32
1635 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1636 {
1637     RAMBlock *block;
1638     ram_addr_t offset;
1639     int flags;
1640     void *area, *vaddr;
1641
1642     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1643         offset = addr - block->offset;
1644         if (offset < block->max_length) {
1645             vaddr = ramblock_ptr(block, offset);
1646             if (block->flags & RAM_PREALLOC) {
1647                 ;
1648             } else if (xen_enabled()) {
1649                 abort();
1650             } else {
1651                 flags = MAP_FIXED;
1652                 if (block->fd >= 0) {
1653                     flags |= (block->flags & RAM_SHARED ?
1654                               MAP_SHARED : MAP_PRIVATE);
1655                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1656                                 flags, block->fd, offset);
1657                 } else {
1658                     /*
1659                      * Remap needs to match alloc.  Accelerators that
1660                      * set phys_mem_alloc never remap.  If they did,
1661                      * we'd need a remap hook here.
1662                      */
1663                     assert(phys_mem_alloc == qemu_anon_ram_alloc);
1664
1665                     flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1666                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1667                                 flags, -1, 0);
1668                 }
1669                 if (area != vaddr) {
1670                     fprintf(stderr, "Could not remap addr: "
1671                             RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1672                             length, addr);
1673                     exit(1);
1674                 }
1675                 memory_try_enable_merging(vaddr, length);
1676                 qemu_ram_setup_dump(vaddr, length);
1677             }
1678         }
1679     }
1680 }
1681 #endif /* !_WIN32 */
1682
1683 int qemu_get_ram_fd(ram_addr_t addr)
1684 {
1685     RAMBlock *block;
1686     int fd;
1687
1688     rcu_read_lock();
1689     block = qemu_get_ram_block(addr);
1690     fd = block->fd;
1691     rcu_read_unlock();
1692     return fd;
1693 }
1694
1695 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1696 {
1697     RAMBlock *block;
1698     void *ptr;
1699
1700     rcu_read_lock();
1701     block = qemu_get_ram_block(addr);
1702     ptr = ramblock_ptr(block, 0);
1703     rcu_read_unlock();
1704     return ptr;
1705 }
1706
1707 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1708  * This should not be used for general purpose DMA.  Use address_space_map
1709  * or address_space_rw instead. For local memory (e.g. video ram) that the
1710  * device owns, use memory_region_get_ram_ptr.
1711  *
1712  * By the time this function returns, the returned pointer is not protected
1713  * by RCU anymore.  If the caller is not within an RCU critical section and
1714  * does not hold the iothread lock, it must have other means of protecting the
1715  * pointer, such as a reference to the region that includes the incoming
1716  * ram_addr_t.
1717  */
1718 void *qemu_get_ram_ptr(ram_addr_t addr)
1719 {
1720     RAMBlock *block;
1721     void *ptr;
1722
1723     rcu_read_lock();
1724     block = qemu_get_ram_block(addr);
1725
1726     if (xen_enabled() && block->host == NULL) {
1727         /* We need to check if the requested address is in the RAM
1728          * because we don't want to map the entire memory in QEMU.
1729          * In that case just map until the end of the page.
1730          */
1731         if (block->offset == 0) {
1732             ptr = xen_map_cache(addr, 0, 0);
1733             goto unlock;
1734         }
1735
1736         block->host = xen_map_cache(block->offset, block->max_length, 1);
1737     }
1738     ptr = ramblock_ptr(block, addr - block->offset);
1739
1740 unlock:
1741     rcu_read_unlock();
1742     return ptr;
1743 }
1744
1745 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1746  * but takes a size argument.
1747  *
1748  * By the time this function returns, the returned pointer is not protected
1749  * by RCU anymore.  If the caller is not within an RCU critical section and
1750  * does not hold the iothread lock, it must have other means of protecting the
1751  * pointer, such as a reference to the region that includes the incoming
1752  * ram_addr_t.
1753  */
1754 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1755 {
1756     void *ptr;
1757     if (*size == 0) {
1758         return NULL;
1759     }
1760     if (xen_enabled()) {
1761         return xen_map_cache(addr, *size, 1);
1762     } else {
1763         RAMBlock *block;
1764         rcu_read_lock();
1765         QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1766             if (addr - block->offset < block->max_length) {
1767                 if (addr - block->offset + *size > block->max_length)
1768                     *size = block->max_length - addr + block->offset;
1769                 ptr = ramblock_ptr(block, addr - block->offset);
1770                 rcu_read_unlock();
1771                 return ptr;
1772             }
1773         }
1774
1775         fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1776         abort();
1777     }
1778 }
1779
1780 /* Some of the softmmu routines need to translate from a host pointer
1781  * (typically a TLB entry) back to a ram offset.
1782  *
1783  * By the time this function returns, the returned pointer is not protected
1784  * by RCU anymore.  If the caller is not within an RCU critical section and
1785  * does not hold the iothread lock, it must have other means of protecting the
1786  * pointer, such as a reference to the region that includes the incoming
1787  * ram_addr_t.
1788  */
1789 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1790 {
1791     RAMBlock *block;
1792     uint8_t *host = ptr;
1793     MemoryRegion *mr;
1794
1795     if (xen_enabled()) {
1796         rcu_read_lock();
1797         *ram_addr = xen_ram_addr_from_mapcache(ptr);
1798         mr = qemu_get_ram_block(*ram_addr)->mr;
1799         rcu_read_unlock();
1800         return mr;
1801     }
1802
1803     rcu_read_lock();
1804     block = atomic_rcu_read(&ram_list.mru_block);
1805     if (block && block->host && host - block->host < block->max_length) {
1806         goto found;
1807     }
1808
1809     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1810         /* This case append when the block is not mapped. */
1811         if (block->host == NULL) {
1812             continue;
1813         }
1814         if (host - block->host < block->max_length) {
1815             goto found;
1816         }
1817     }
1818
1819     rcu_read_unlock();
1820     return NULL;
1821
1822 found:
1823     *ram_addr = block->offset + (host - block->host);
1824     mr = block->mr;
1825     rcu_read_unlock();
1826     return mr;
1827 }
1828
1829 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1830                                uint64_t val, unsigned size)
1831 {
1832     if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1833         tb_invalidate_phys_page_fast(ram_addr, size);
1834     }
1835     switch (size) {
1836     case 1:
1837         stb_p(qemu_get_ram_ptr(ram_addr), val);
1838         break;
1839     case 2:
1840         stw_p(qemu_get_ram_ptr(ram_addr), val);
1841         break;
1842     case 4:
1843         stl_p(qemu_get_ram_ptr(ram_addr), val);
1844         break;
1845     default:
1846         abort();
1847     }
1848     /* Set both VGA and migration bits for simplicity and to remove
1849      * the notdirty callback faster.
1850      */
1851     cpu_physical_memory_set_dirty_range(ram_addr, size,
1852                                         DIRTY_CLIENTS_NOCODE);
1853     /* we remove the notdirty callback only if the code has been
1854        flushed */
1855     if (!cpu_physical_memory_is_clean(ram_addr)) {
1856         CPUArchState *env = current_cpu->env_ptr;
1857         tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1858     }
1859 }
1860
1861 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1862                                  unsigned size, bool is_write)
1863 {
1864     return is_write;
1865 }
1866
1867 static const MemoryRegionOps notdirty_mem_ops = {
1868     .write = notdirty_mem_write,
1869     .valid.accepts = notdirty_mem_accepts,
1870     .endianness = DEVICE_NATIVE_ENDIAN,
1871 };
1872
1873 /* Generate a debug exception if a watchpoint has been hit.  */
1874 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
1875 {
1876     CPUState *cpu = current_cpu;
1877     CPUArchState *env = cpu->env_ptr;
1878     target_ulong pc, cs_base;
1879     target_ulong vaddr;
1880     CPUWatchpoint *wp;
1881     int cpu_flags;
1882
1883     if (cpu->watchpoint_hit) {
1884         /* We re-entered the check after replacing the TB. Now raise
1885          * the debug interrupt so that is will trigger after the
1886          * current instruction. */
1887         cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1888         return;
1889     }
1890     vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1891     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1892         if (cpu_watchpoint_address_matches(wp, vaddr, len)
1893             && (wp->flags & flags)) {
1894             if (flags == BP_MEM_READ) {
1895                 wp->flags |= BP_WATCHPOINT_HIT_READ;
1896             } else {
1897                 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
1898             }
1899             wp->hitaddr = vaddr;
1900             wp->hitattrs = attrs;
1901             if (!cpu->watchpoint_hit) {
1902                 cpu->watchpoint_hit = wp;
1903                 tb_check_watchpoint(cpu);
1904                 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1905                     cpu->exception_index = EXCP_DEBUG;
1906                     cpu_loop_exit(cpu);
1907                 } else {
1908                     cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1909                     tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1910                     cpu_resume_from_signal(cpu, NULL);
1911                 }
1912             }
1913         } else {
1914             wp->flags &= ~BP_WATCHPOINT_HIT;
1915         }
1916     }
1917 }
1918
1919 /* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
1920    so these check for a hit then pass through to the normal out-of-line
1921    phys routines.  */
1922 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
1923                                   unsigned size, MemTxAttrs attrs)
1924 {
1925     MemTxResult res;
1926     uint64_t data;
1927
1928     check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
1929     switch (size) {
1930     case 1:
1931         data = address_space_ldub(&address_space_memory, addr, attrs, &res);
1932         break;
1933     case 2:
1934         data = address_space_lduw(&address_space_memory, addr, attrs, &res);
1935         break;
1936     case 4:
1937         data = address_space_ldl(&address_space_memory, addr, attrs, &res);
1938         break;
1939     default: abort();
1940     }
1941     *pdata = data;
1942     return res;
1943 }
1944
1945 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
1946                                    uint64_t val, unsigned size,
1947                                    MemTxAttrs attrs)
1948 {
1949     MemTxResult res;
1950
1951     check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
1952     switch (size) {
1953     case 1:
1954         address_space_stb(&address_space_memory, addr, val, attrs, &res);
1955         break;
1956     case 2:
1957         address_space_stw(&address_space_memory, addr, val, attrs, &res);
1958         break;
1959     case 4:
1960         address_space_stl(&address_space_memory, addr, val, attrs, &res);
1961         break;
1962     default: abort();
1963     }
1964     return res;
1965 }
1966
1967 static const MemoryRegionOps watch_mem_ops = {
1968     .read_with_attrs = watch_mem_read,
1969     .write_with_attrs = watch_mem_write,
1970     .endianness = DEVICE_NATIVE_ENDIAN,
1971 };
1972
1973 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
1974                                 unsigned len, MemTxAttrs attrs)
1975 {
1976     subpage_t *subpage = opaque;
1977     uint8_t buf[8];
1978     MemTxResult res;
1979
1980 #if defined(DEBUG_SUBPAGE)
1981     printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1982            subpage, len, addr);
1983 #endif
1984     res = address_space_read(subpage->as, addr + subpage->base,
1985                              attrs, buf, len);
1986     if (res) {
1987         return res;
1988     }
1989     switch (len) {
1990     case 1:
1991         *data = ldub_p(buf);
1992         return MEMTX_OK;
1993     case 2:
1994         *data = lduw_p(buf);
1995         return MEMTX_OK;
1996     case 4:
1997         *data = ldl_p(buf);
1998         return MEMTX_OK;
1999     case 8:
2000         *data = ldq_p(buf);
2001         return MEMTX_OK;
2002     default:
2003         abort();
2004     }
2005 }
2006
2007 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2008                                  uint64_t value, unsigned len, MemTxAttrs attrs)
2009 {
2010     subpage_t *subpage = opaque;
2011     uint8_t buf[8];
2012
2013 #if defined(DEBUG_SUBPAGE)
2014     printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2015            " value %"PRIx64"\n",
2016            __func__, subpage, len, addr, value);
2017 #endif
2018     switch (len) {
2019     case 1:
2020         stb_p(buf, value);
2021         break;
2022     case 2:
2023         stw_p(buf, value);
2024         break;
2025     case 4:
2026         stl_p(buf, value);
2027         break;
2028     case 8:
2029         stq_p(buf, value);
2030         break;
2031     default:
2032         abort();
2033     }
2034     return address_space_write(subpage->as, addr + subpage->base,
2035                                attrs, buf, len);
2036 }
2037
2038 static bool subpage_accepts(void *opaque, hwaddr addr,
2039                             unsigned len, bool is_write)
2040 {
2041     subpage_t *subpage = opaque;
2042 #if defined(DEBUG_SUBPAGE)
2043     printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2044            __func__, subpage, is_write ? 'w' : 'r', len, addr);
2045 #endif
2046
2047     return address_space_access_valid(subpage->as, addr + subpage->base,
2048                                       len, is_write);
2049 }
2050
2051 static const MemoryRegionOps subpage_ops = {
2052     .read_with_attrs = subpage_read,
2053     .write_with_attrs = subpage_write,
2054     .impl.min_access_size = 1,
2055     .impl.max_access_size = 8,
2056     .valid.min_access_size = 1,
2057     .valid.max_access_size = 8,
2058     .valid.accepts = subpage_accepts,
2059     .endianness = DEVICE_NATIVE_ENDIAN,
2060 };
2061
2062 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2063                              uint16_t section)
2064 {
2065     int idx, eidx;
2066
2067     if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2068         return -1;
2069     idx = SUBPAGE_IDX(start);
2070     eidx = SUBPAGE_IDX(end);
2071 #if defined(DEBUG_SUBPAGE)
2072     printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2073            __func__, mmio, start, end, idx, eidx, section);
2074 #endif
2075     for (; idx <= eidx; idx++) {
2076         mmio->sub_section[idx] = section;
2077     }
2078
2079     return 0;
2080 }
2081
2082 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2083 {
2084     subpage_t *mmio;
2085
2086     mmio = g_malloc0(sizeof(subpage_t));
2087
2088     mmio->as = as;
2089     mmio->base = base;
2090     memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2091                           NULL, TARGET_PAGE_SIZE);
2092     mmio->iomem.subpage = true;
2093 #if defined(DEBUG_SUBPAGE)
2094     printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2095            mmio, base, TARGET_PAGE_SIZE);
2096 #endif
2097     subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2098
2099     return mmio;
2100 }
2101
2102 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2103                               MemoryRegion *mr)
2104 {
2105     assert(as);
2106     MemoryRegionSection section = {
2107         .address_space = as,
2108         .mr = mr,
2109         .offset_within_address_space = 0,
2110         .offset_within_region = 0,
2111         .size = int128_2_64(),
2112     };
2113
2114     return phys_section_add(map, &section);
2115 }
2116
2117 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
2118 {
2119     AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch);
2120     MemoryRegionSection *sections = d->map.sections;
2121
2122     return sections[index & ~TARGET_PAGE_MASK].mr;
2123 }
2124
2125 static void io_mem_init(void)
2126 {
2127     memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2128     memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2129                           NULL, UINT64_MAX);
2130     memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2131                           NULL, UINT64_MAX);
2132     memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2133                           NULL, UINT64_MAX);
2134 }
2135
2136 static void mem_begin(MemoryListener *listener)
2137 {
2138     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2139     AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2140     uint16_t n;
2141
2142     n = dummy_section(&d->map, as, &io_mem_unassigned);
2143     assert(n == PHYS_SECTION_UNASSIGNED);
2144     n = dummy_section(&d->map, as, &io_mem_notdirty);
2145     assert(n == PHYS_SECTION_NOTDIRTY);
2146     n = dummy_section(&d->map, as, &io_mem_rom);
2147     assert(n == PHYS_SECTION_ROM);
2148     n = dummy_section(&d->map, as, &io_mem_watch);
2149     assert(n == PHYS_SECTION_WATCH);
2150
2151     d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2152     d->as = as;
2153     as->next_dispatch = d;
2154 }
2155
2156 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2157 {
2158     phys_sections_free(&d->map);
2159     g_free(d);
2160 }
2161
2162 static void mem_commit(MemoryListener *listener)
2163 {
2164     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2165     AddressSpaceDispatch *cur = as->dispatch;
2166     AddressSpaceDispatch *next = as->next_dispatch;
2167
2168     phys_page_compact_all(next, next->map.nodes_nb);
2169
2170     atomic_rcu_set(&as->dispatch, next);
2171     if (cur) {
2172         call_rcu(cur, address_space_dispatch_free, rcu);
2173     }
2174 }
2175
2176 static void tcg_commit(MemoryListener *listener)
2177 {
2178     CPUState *cpu;
2179
2180     /* since each CPU stores ram addresses in its TLB cache, we must
2181        reset the modified entries */
2182     /* XXX: slow ! */
2183     CPU_FOREACH(cpu) {
2184         /* FIXME: Disentangle the cpu.h circular files deps so we can
2185            directly get the right CPU from listener.  */
2186         if (cpu->tcg_as_listener != listener) {
2187             continue;
2188         }
2189         cpu_reload_memory_map(cpu);
2190     }
2191 }
2192
2193 void address_space_init_dispatch(AddressSpace *as)
2194 {
2195     as->dispatch = NULL;
2196     as->dispatch_listener = (MemoryListener) {
2197         .begin = mem_begin,
2198         .commit = mem_commit,
2199         .region_add = mem_add,
2200         .region_nop = mem_add,
2201         .priority = 0,
2202     };
2203     memory_listener_register(&as->dispatch_listener, as);
2204 }
2205
2206 void address_space_unregister(AddressSpace *as)
2207 {
2208     memory_listener_unregister(&as->dispatch_listener);
2209 }
2210
2211 void address_space_destroy_dispatch(AddressSpace *as)
2212 {
2213     AddressSpaceDispatch *d = as->dispatch;
2214
2215     atomic_rcu_set(&as->dispatch, NULL);
2216     if (d) {
2217         call_rcu(d, address_space_dispatch_free, rcu);
2218     }
2219 }
2220
2221 static void memory_map_init(void)
2222 {
2223     system_memory = g_malloc(sizeof(*system_memory));
2224
2225     memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2226     address_space_init(&address_space_memory, system_memory, "memory");
2227
2228     system_io = g_malloc(sizeof(*system_io));
2229     memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2230                           65536);
2231     address_space_init(&address_space_io, system_io, "I/O");
2232 }
2233
2234 MemoryRegion *get_system_memory(void)
2235 {
2236     return system_memory;
2237 }
2238
2239 MemoryRegion *get_system_io(void)
2240 {
2241     return system_io;
2242 }
2243
2244 #endif /* !defined(CONFIG_USER_ONLY) */
2245
2246 /* physical memory access (slow version, mainly for debug) */
2247 #if defined(CONFIG_USER_ONLY)
2248 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2249                         uint8_t *buf, int len, int is_write)
2250 {
2251     int l, flags;
2252     target_ulong page;
2253     void * p;
2254
2255     while (len > 0) {
2256         page = addr & TARGET_PAGE_MASK;
2257         l = (page + TARGET_PAGE_SIZE) - addr;
2258         if (l > len)
2259             l = len;
2260         flags = page_get_flags(page);
2261         if (!(flags & PAGE_VALID))
2262             return -1;
2263         if (is_write) {
2264             if (!(flags & PAGE_WRITE))
2265                 return -1;
2266             /* XXX: this code should not depend on lock_user */
2267             if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2268                 return -1;
2269             memcpy(p, buf, l);
2270             unlock_user(p, addr, l);
2271         } else {
2272             if (!(flags & PAGE_READ))
2273                 return -1;
2274             /* XXX: this code should not depend on lock_user */
2275             if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2276                 return -1;
2277             memcpy(buf, p, l);
2278             unlock_user(p, addr, 0);
2279         }
2280         len -= l;
2281         buf += l;
2282         addr += l;
2283     }
2284     return 0;
2285 }
2286
2287 #else
2288
2289 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2290                                      hwaddr length)
2291 {
2292     uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2293     /* No early return if dirty_log_mask is or becomes 0, because
2294      * cpu_physical_memory_set_dirty_range will still call
2295      * xen_modified_memory.
2296      */
2297     if (dirty_log_mask) {
2298         dirty_log_mask =
2299             cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2300     }
2301     if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2302         tb_invalidate_phys_range(addr, addr + length);
2303         dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2304     }
2305     cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2306 }
2307
2308 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2309 {
2310     unsigned access_size_max = mr->ops->valid.max_access_size;
2311
2312     /* Regions are assumed to support 1-4 byte accesses unless
2313        otherwise specified.  */
2314     if (access_size_max == 0) {
2315         access_size_max = 4;
2316     }
2317
2318     /* Bound the maximum access by the alignment of the address.  */
2319     if (!mr->ops->impl.unaligned) {
2320         unsigned align_size_max = addr & -addr;
2321         if (align_size_max != 0 && align_size_max < access_size_max) {
2322             access_size_max = align_size_max;
2323         }
2324     }
2325
2326     /* Don't attempt accesses larger than the maximum.  */
2327     if (l > access_size_max) {
2328         l = access_size_max;
2329     }
2330     if (l & (l - 1)) {
2331         l = 1 << (qemu_fls(l) - 1);
2332     }
2333
2334     return l;
2335 }
2336
2337 static bool prepare_mmio_access(MemoryRegion *mr)
2338 {
2339     bool unlocked = !qemu_mutex_iothread_locked();
2340     bool release_lock = false;
2341
2342     if (unlocked && mr->global_locking) {
2343         qemu_mutex_lock_iothread();
2344         unlocked = false;
2345         release_lock = true;
2346     }
2347     if (mr->flush_coalesced_mmio) {
2348         if (unlocked) {
2349             qemu_mutex_lock_iothread();
2350         }
2351         qemu_flush_coalesced_mmio_buffer();
2352         if (unlocked) {
2353             qemu_mutex_unlock_iothread();
2354         }
2355     }
2356
2357     return release_lock;
2358 }
2359
2360 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2361                              uint8_t *buf, int len, bool is_write)
2362 {
2363     hwaddr l;
2364     uint8_t *ptr;
2365     uint64_t val;
2366     hwaddr addr1;
2367     MemoryRegion *mr;
2368     MemTxResult result = MEMTX_OK;
2369     bool release_lock = false;
2370
2371     rcu_read_lock();
2372     while (len > 0) {
2373         l = len;
2374         mr = address_space_translate(as, addr, &addr1, &l, is_write);
2375
2376         if (is_write) {
2377             if (!memory_access_is_direct(mr, is_write)) {
2378                 release_lock |= prepare_mmio_access(mr);
2379                 l = memory_access_size(mr, l, addr1);
2380                 /* XXX: could force current_cpu to NULL to avoid
2381                    potential bugs */
2382                 switch (l) {
2383                 case 8:
2384                     /* 64 bit write access */
2385                     val = ldq_p(buf);
2386                     result |= memory_region_dispatch_write(mr, addr1, val, 8,
2387                                                            attrs);
2388                     break;
2389                 case 4:
2390                     /* 32 bit write access */
2391                     val = ldl_p(buf);
2392                     result |= memory_region_dispatch_write(mr, addr1, val, 4,
2393                                                            attrs);
2394                     break;
2395                 case 2:
2396                     /* 16 bit write access */
2397                     val = lduw_p(buf);
2398                     result |= memory_region_dispatch_write(mr, addr1, val, 2,
2399                                                            attrs);
2400                     break;
2401                 case 1:
2402                     /* 8 bit write access */
2403                     val = ldub_p(buf);
2404                     result |= memory_region_dispatch_write(mr, addr1, val, 1,
2405                                                            attrs);
2406                     break;
2407                 default:
2408                     abort();
2409                 }
2410             } else {
2411                 addr1 += memory_region_get_ram_addr(mr);
2412                 /* RAM case */
2413                 ptr = qemu_get_ram_ptr(addr1);
2414                 memcpy(ptr, buf, l);
2415                 invalidate_and_set_dirty(mr, addr1, l);
2416             }
2417         } else {
2418             if (!memory_access_is_direct(mr, is_write)) {
2419                 /* I/O case */
2420                 release_lock |= prepare_mmio_access(mr);
2421                 l = memory_access_size(mr, l, addr1);
2422                 switch (l) {
2423                 case 8:
2424                     /* 64 bit read access */
2425                     result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2426                                                           attrs);
2427                     stq_p(buf, val);
2428                     break;
2429                 case 4:
2430                     /* 32 bit read access */
2431                     result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2432                                                           attrs);
2433                     stl_p(buf, val);
2434                     break;
2435                 case 2:
2436                     /* 16 bit read access */
2437                     result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2438                                                           attrs);
2439                     stw_p(buf, val);
2440                     break;
2441                 case 1:
2442                     /* 8 bit read access */
2443                     result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2444                                                           attrs);
2445                     stb_p(buf, val);
2446                     break;
2447                 default:
2448                     abort();
2449                 }
2450             } else {
2451                 /* RAM case */
2452                 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2453                 memcpy(buf, ptr, l);
2454             }
2455         }
2456
2457         if (release_lock) {
2458             qemu_mutex_unlock_iothread();
2459             release_lock = false;
2460         }
2461
2462         len -= l;
2463         buf += l;
2464         addr += l;
2465     }
2466     rcu_read_unlock();
2467
2468     return result;
2469 }
2470
2471 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2472                                 const uint8_t *buf, int len)
2473 {
2474     return address_space_rw(as, addr, attrs, (uint8_t *)buf, len, true);
2475 }
2476
2477 MemTxResult address_space_read(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2478                                uint8_t *buf, int len)
2479 {
2480     return address_space_rw(as, addr, attrs, buf, len, false);
2481 }
2482
2483
2484 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2485                             int len, int is_write)
2486 {
2487     address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2488                      buf, len, is_write);
2489 }
2490
2491 enum write_rom_type {
2492     WRITE_DATA,
2493     FLUSH_CACHE,
2494 };
2495
2496 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2497     hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2498 {
2499     hwaddr l;
2500     uint8_t *ptr;
2501     hwaddr addr1;
2502     MemoryRegion *mr;
2503
2504     rcu_read_lock();
2505     while (len > 0) {
2506         l = len;
2507         mr = address_space_translate(as, addr, &addr1, &l, true);
2508
2509         if (!(memory_region_is_ram(mr) ||
2510               memory_region_is_romd(mr))) {
2511             l = memory_access_size(mr, l, addr1);
2512         } else {
2513             addr1 += memory_region_get_ram_addr(mr);
2514             /* ROM/RAM case */
2515             ptr = qemu_get_ram_ptr(addr1);
2516             switch (type) {
2517             case WRITE_DATA:
2518                 memcpy(ptr, buf, l);
2519                 invalidate_and_set_dirty(mr, addr1, l);
2520                 break;
2521             case FLUSH_CACHE:
2522                 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2523                 break;
2524             }
2525         }
2526         len -= l;
2527         buf += l;
2528         addr += l;
2529     }
2530     rcu_read_unlock();
2531 }
2532
2533 /* used for ROM loading : can write in RAM and ROM */
2534 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2535                                    const uint8_t *buf, int len)
2536 {
2537     cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2538 }
2539
2540 void cpu_flush_icache_range(hwaddr start, int len)
2541 {
2542     /*
2543      * This function should do the same thing as an icache flush that was
2544      * triggered from within the guest. For TCG we are always cache coherent,
2545      * so there is no need to flush anything. For KVM / Xen we need to flush
2546      * the host's instruction cache at least.
2547      */
2548     if (tcg_enabled()) {
2549         return;
2550     }
2551
2552     cpu_physical_memory_write_rom_internal(&address_space_memory,
2553                                            start, NULL, len, FLUSH_CACHE);
2554 }
2555
2556 typedef struct {
2557     MemoryRegion *mr;
2558     void *buffer;
2559     hwaddr addr;
2560     hwaddr len;
2561     bool in_use;
2562 } BounceBuffer;
2563
2564 static BounceBuffer bounce;
2565
2566 typedef struct MapClient {
2567     QEMUBH *bh;
2568     QLIST_ENTRY(MapClient) link;
2569 } MapClient;
2570
2571 QemuMutex map_client_list_lock;
2572 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2573     = QLIST_HEAD_INITIALIZER(map_client_list);
2574
2575 static void cpu_unregister_map_client_do(MapClient *client)
2576 {
2577     QLIST_REMOVE(client, link);
2578     g_free(client);
2579 }
2580
2581 static void cpu_notify_map_clients_locked(void)
2582 {
2583     MapClient *client;
2584
2585     while (!QLIST_EMPTY(&map_client_list)) {
2586         client = QLIST_FIRST(&map_client_list);
2587         qemu_bh_schedule(client->bh);
2588         cpu_unregister_map_client_do(client);
2589     }
2590 }
2591
2592 void cpu_register_map_client(QEMUBH *bh)
2593 {
2594     MapClient *client = g_malloc(sizeof(*client));
2595
2596     qemu_mutex_lock(&map_client_list_lock);
2597     client->bh = bh;
2598     QLIST_INSERT_HEAD(&map_client_list, client, link);
2599     if (!atomic_read(&bounce.in_use)) {
2600         cpu_notify_map_clients_locked();
2601     }
2602     qemu_mutex_unlock(&map_client_list_lock);
2603 }
2604
2605 void cpu_exec_init_all(void)
2606 {
2607     qemu_mutex_init(&ram_list.mutex);
2608     memory_map_init();
2609     io_mem_init();
2610     qemu_mutex_init(&map_client_list_lock);
2611 }
2612
2613 void cpu_unregister_map_client(QEMUBH *bh)
2614 {
2615     MapClient *client;
2616
2617     qemu_mutex_lock(&map_client_list_lock);
2618     QLIST_FOREACH(client, &map_client_list, link) {
2619         if (client->bh == bh) {
2620             cpu_unregister_map_client_do(client);
2621             break;
2622         }
2623     }
2624     qemu_mutex_unlock(&map_client_list_lock);
2625 }
2626
2627 static void cpu_notify_map_clients(void)
2628 {
2629     qemu_mutex_lock(&map_client_list_lock);
2630     cpu_notify_map_clients_locked();
2631     qemu_mutex_unlock(&map_client_list_lock);
2632 }
2633
2634 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2635 {
2636     MemoryRegion *mr;
2637     hwaddr l, xlat;
2638
2639     rcu_read_lock();
2640     while (len > 0) {
2641         l = len;
2642         mr = address_space_translate(as, addr, &xlat, &l, is_write);
2643         if (!memory_access_is_direct(mr, is_write)) {
2644             l = memory_access_size(mr, l, addr);
2645             if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2646                 return false;
2647             }
2648         }
2649
2650         len -= l;
2651         addr += l;
2652     }
2653     rcu_read_unlock();
2654     return true;
2655 }
2656
2657 /* Map a physical memory region into a host virtual address.
2658  * May map a subset of the requested range, given by and returned in *plen.
2659  * May return NULL if resources needed to perform the mapping are exhausted.
2660  * Use only for reads OR writes - not for read-modify-write operations.
2661  * Use cpu_register_map_client() to know when retrying the map operation is
2662  * likely to succeed.
2663  */
2664 void *address_space_map(AddressSpace *as,
2665                         hwaddr addr,
2666                         hwaddr *plen,
2667                         bool is_write)
2668 {
2669     hwaddr len = *plen;
2670     hwaddr done = 0;
2671     hwaddr l, xlat, base;
2672     MemoryRegion *mr, *this_mr;
2673     ram_addr_t raddr;
2674
2675     if (len == 0) {
2676         return NULL;
2677     }
2678
2679     l = len;
2680     rcu_read_lock();
2681     mr = address_space_translate(as, addr, &xlat, &l, is_write);
2682
2683     if (!memory_access_is_direct(mr, is_write)) {
2684         if (atomic_xchg(&bounce.in_use, true)) {
2685             rcu_read_unlock();
2686             return NULL;
2687         }
2688         /* Avoid unbounded allocations */
2689         l = MIN(l, TARGET_PAGE_SIZE);
2690         bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2691         bounce.addr = addr;
2692         bounce.len = l;
2693
2694         memory_region_ref(mr);
2695         bounce.mr = mr;
2696         if (!is_write) {
2697             address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2698                                bounce.buffer, l);
2699         }
2700
2701         rcu_read_unlock();
2702         *plen = l;
2703         return bounce.buffer;
2704     }
2705
2706     base = xlat;
2707     raddr = memory_region_get_ram_addr(mr);
2708
2709     for (;;) {
2710         len -= l;
2711         addr += l;
2712         done += l;
2713         if (len == 0) {
2714             break;
2715         }
2716
2717         l = len;
2718         this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2719         if (this_mr != mr || xlat != base + done) {
2720             break;
2721         }
2722     }
2723
2724     memory_region_ref(mr);
2725     rcu_read_unlock();
2726     *plen = done;
2727     return qemu_ram_ptr_length(raddr + base, plen);
2728 }
2729
2730 /* Unmaps a memory region previously mapped by address_space_map().
2731  * Will also mark the memory as dirty if is_write == 1.  access_len gives
2732  * the amount of memory that was actually read or written by the caller.
2733  */
2734 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2735                          int is_write, hwaddr access_len)
2736 {
2737     if (buffer != bounce.buffer) {
2738         MemoryRegion *mr;
2739         ram_addr_t addr1;
2740
2741         mr = qemu_ram_addr_from_host(buffer, &addr1);
2742         assert(mr != NULL);
2743         if (is_write) {
2744             invalidate_and_set_dirty(mr, addr1, access_len);
2745         }
2746         if (xen_enabled()) {
2747             xen_invalidate_map_cache_entry(buffer);
2748         }
2749         memory_region_unref(mr);
2750         return;
2751     }
2752     if (is_write) {
2753         address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
2754                             bounce.buffer, access_len);
2755     }
2756     qemu_vfree(bounce.buffer);
2757     bounce.buffer = NULL;
2758     memory_region_unref(bounce.mr);
2759     atomic_mb_set(&bounce.in_use, false);
2760     cpu_notify_map_clients();
2761 }
2762
2763 void *cpu_physical_memory_map(hwaddr addr,
2764                               hwaddr *plen,
2765                               int is_write)
2766 {
2767     return address_space_map(&address_space_memory, addr, plen, is_write);
2768 }
2769
2770 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2771                                int is_write, hwaddr access_len)
2772 {
2773     return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2774 }
2775
2776 /* warning: addr must be aligned */
2777 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
2778                                                   MemTxAttrs attrs,
2779                                                   MemTxResult *result,
2780                                                   enum device_endian endian)
2781 {
2782     uint8_t *ptr;
2783     uint64_t val;
2784     MemoryRegion *mr;
2785     hwaddr l = 4;
2786     hwaddr addr1;
2787     MemTxResult r;
2788     bool release_lock = false;
2789
2790     rcu_read_lock();
2791     mr = address_space_translate(as, addr, &addr1, &l, false);
2792     if (l < 4 || !memory_access_is_direct(mr, false)) {
2793         release_lock |= prepare_mmio_access(mr);
2794
2795         /* I/O case */
2796         r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
2797 #if defined(TARGET_WORDS_BIGENDIAN)
2798         if (endian == DEVICE_LITTLE_ENDIAN) {
2799             val = bswap32(val);
2800         }
2801 #else
2802         if (endian == DEVICE_BIG_ENDIAN) {
2803             val = bswap32(val);
2804         }
2805 #endif
2806     } else {
2807         /* RAM case */
2808         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2809                                 & TARGET_PAGE_MASK)
2810                                + addr1);
2811         switch (endian) {
2812         case DEVICE_LITTLE_ENDIAN:
2813             val = ldl_le_p(ptr);
2814             break;
2815         case DEVICE_BIG_ENDIAN:
2816             val = ldl_be_p(ptr);
2817             break;
2818         default:
2819             val = ldl_p(ptr);
2820             break;
2821         }
2822         r = MEMTX_OK;
2823     }
2824     if (result) {
2825         *result = r;
2826     }
2827     if (release_lock) {
2828         qemu_mutex_unlock_iothread();
2829     }
2830     rcu_read_unlock();
2831     return val;
2832 }
2833
2834 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
2835                            MemTxAttrs attrs, MemTxResult *result)
2836 {
2837     return address_space_ldl_internal(as, addr, attrs, result,
2838                                       DEVICE_NATIVE_ENDIAN);
2839 }
2840
2841 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
2842                               MemTxAttrs attrs, MemTxResult *result)
2843 {
2844     return address_space_ldl_internal(as, addr, attrs, result,
2845                                       DEVICE_LITTLE_ENDIAN);
2846 }
2847
2848 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
2849                               MemTxAttrs attrs, MemTxResult *result)
2850 {
2851     return address_space_ldl_internal(as, addr, attrs, result,
2852                                       DEVICE_BIG_ENDIAN);
2853 }
2854
2855 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2856 {
2857     return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2858 }
2859
2860 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2861 {
2862     return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2863 }
2864
2865 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2866 {
2867     return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2868 }
2869
2870 /* warning: addr must be aligned */
2871 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
2872                                                   MemTxAttrs attrs,
2873                                                   MemTxResult *result,
2874                                                   enum device_endian endian)
2875 {
2876     uint8_t *ptr;
2877     uint64_t val;
2878     MemoryRegion *mr;
2879     hwaddr l = 8;
2880     hwaddr addr1;
2881     MemTxResult r;
2882     bool release_lock = false;
2883
2884     rcu_read_lock();
2885     mr = address_space_translate(as, addr, &addr1, &l,
2886                                  false);
2887     if (l < 8 || !memory_access_is_direct(mr, false)) {
2888         release_lock |= prepare_mmio_access(mr);
2889
2890         /* I/O case */
2891         r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
2892 #if defined(TARGET_WORDS_BIGENDIAN)
2893         if (endian == DEVICE_LITTLE_ENDIAN) {
2894             val = bswap64(val);
2895         }
2896 #else
2897         if (endian == DEVICE_BIG_ENDIAN) {
2898             val = bswap64(val);
2899         }
2900 #endif
2901     } else {
2902         /* RAM case */
2903         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2904                                 & TARGET_PAGE_MASK)
2905                                + addr1);
2906         switch (endian) {
2907         case DEVICE_LITTLE_ENDIAN:
2908             val = ldq_le_p(ptr);
2909             break;
2910         case DEVICE_BIG_ENDIAN:
2911             val = ldq_be_p(ptr);
2912             break;
2913         default:
2914             val = ldq_p(ptr);
2915             break;
2916         }
2917         r = MEMTX_OK;
2918     }
2919     if (result) {
2920         *result = r;
2921     }
2922     if (release_lock) {
2923         qemu_mutex_unlock_iothread();
2924     }
2925     rcu_read_unlock();
2926     return val;
2927 }
2928
2929 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
2930                            MemTxAttrs attrs, MemTxResult *result)
2931 {
2932     return address_space_ldq_internal(as, addr, attrs, result,
2933                                       DEVICE_NATIVE_ENDIAN);
2934 }
2935
2936 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
2937                            MemTxAttrs attrs, MemTxResult *result)
2938 {
2939     return address_space_ldq_internal(as, addr, attrs, result,
2940                                       DEVICE_LITTLE_ENDIAN);
2941 }
2942
2943 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
2944                            MemTxAttrs attrs, MemTxResult *result)
2945 {
2946     return address_space_ldq_internal(as, addr, attrs, result,
2947                                       DEVICE_BIG_ENDIAN);
2948 }
2949
2950 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2951 {
2952     return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2953 }
2954
2955 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2956 {
2957     return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2958 }
2959
2960 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2961 {
2962     return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2963 }
2964
2965 /* XXX: optimize */
2966 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
2967                             MemTxAttrs attrs, MemTxResult *result)
2968 {
2969     uint8_t val;
2970     MemTxResult r;
2971
2972     r = address_space_rw(as, addr, attrs, &val, 1, 0);
2973     if (result) {
2974         *result = r;
2975     }
2976     return val;
2977 }
2978
2979 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2980 {
2981     return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2982 }
2983
2984 /* warning: addr must be aligned */
2985 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
2986                                                    hwaddr addr,
2987                                                    MemTxAttrs attrs,
2988                                                    MemTxResult *result,
2989                                                    enum device_endian endian)
2990 {
2991     uint8_t *ptr;
2992     uint64_t val;
2993     MemoryRegion *mr;
2994     hwaddr l = 2;
2995     hwaddr addr1;
2996     MemTxResult r;
2997     bool release_lock = false;
2998
2999     rcu_read_lock();
3000     mr = address_space_translate(as, addr, &addr1, &l,
3001                                  false);
3002     if (l < 2 || !memory_access_is_direct(mr, false)) {
3003         release_lock |= prepare_mmio_access(mr);
3004
3005         /* I/O case */
3006         r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3007 #if defined(TARGET_WORDS_BIGENDIAN)
3008         if (endian == DEVICE_LITTLE_ENDIAN) {
3009             val = bswap16(val);
3010         }
3011 #else
3012         if (endian == DEVICE_BIG_ENDIAN) {
3013             val = bswap16(val);
3014         }
3015 #endif
3016     } else {
3017         /* RAM case */
3018         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3019                                 & TARGET_PAGE_MASK)
3020                                + addr1);
3021         switch (endian) {
3022         case DEVICE_LITTLE_ENDIAN:
3023             val = lduw_le_p(ptr);
3024             break;
3025         case DEVICE_BIG_ENDIAN:
3026             val = lduw_be_p(ptr);
3027             break;
3028         default:
3029             val = lduw_p(ptr);
3030             break;
3031         }
3032         r = MEMTX_OK;
3033     }
3034     if (result) {
3035         *result = r;
3036     }
3037     if (release_lock) {
3038         qemu_mutex_unlock_iothread();
3039     }
3040     rcu_read_unlock();
3041     return val;
3042 }
3043
3044 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3045                            MemTxAttrs attrs, MemTxResult *result)
3046 {
3047     return address_space_lduw_internal(as, addr, attrs, result,
3048                                        DEVICE_NATIVE_ENDIAN);
3049 }
3050
3051 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3052                            MemTxAttrs attrs, MemTxResult *result)
3053 {
3054     return address_space_lduw_internal(as, addr, attrs, result,
3055                                        DEVICE_LITTLE_ENDIAN);
3056 }
3057
3058 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3059                            MemTxAttrs attrs, MemTxResult *result)
3060 {
3061     return address_space_lduw_internal(as, addr, attrs, result,
3062                                        DEVICE_BIG_ENDIAN);
3063 }
3064
3065 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3066 {
3067     return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3068 }
3069
3070 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3071 {
3072     return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3073 }
3074
3075 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3076 {
3077     return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3078 }
3079
3080 /* warning: addr must be aligned. The ram page is not masked as dirty
3081    and the code inside is not invalidated. It is useful if the dirty
3082    bits are used to track modified PTEs */
3083 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3084                                 MemTxAttrs attrs, MemTxResult *result)
3085 {
3086     uint8_t *ptr;
3087     MemoryRegion *mr;
3088     hwaddr l = 4;
3089     hwaddr addr1;
3090     MemTxResult r;
3091     uint8_t dirty_log_mask;
3092     bool release_lock = false;
3093
3094     rcu_read_lock();
3095     mr = address_space_translate(as, addr, &addr1, &l,
3096                                  true);
3097     if (l < 4 || !memory_access_is_direct(mr, true)) {
3098         release_lock |= prepare_mmio_access(mr);
3099
3100         r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3101     } else {
3102         addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3103         ptr = qemu_get_ram_ptr(addr1);
3104         stl_p(ptr, val);
3105
3106         dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3107         dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3108         cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3109         r = MEMTX_OK;
3110     }
3111     if (result) {
3112         *result = r;
3113     }
3114     if (release_lock) {
3115         qemu_mutex_unlock_iothread();
3116     }
3117     rcu_read_unlock();
3118 }
3119
3120 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3121 {
3122     address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3123 }
3124
3125 /* warning: addr must be aligned */
3126 static inline void address_space_stl_internal(AddressSpace *as,
3127                                               hwaddr addr, uint32_t val,
3128                                               MemTxAttrs attrs,
3129                                               MemTxResult *result,
3130                                               enum device_endian endian)
3131 {
3132     uint8_t *ptr;
3133     MemoryRegion *mr;
3134     hwaddr l = 4;
3135     hwaddr addr1;
3136     MemTxResult r;
3137     bool release_lock = false;
3138
3139     rcu_read_lock();
3140     mr = address_space_translate(as, addr, &addr1, &l,
3141                                  true);
3142     if (l < 4 || !memory_access_is_direct(mr, true)) {
3143         release_lock |= prepare_mmio_access(mr);
3144
3145 #if defined(TARGET_WORDS_BIGENDIAN)
3146         if (endian == DEVICE_LITTLE_ENDIAN) {
3147             val = bswap32(val);
3148         }
3149 #else
3150         if (endian == DEVICE_BIG_ENDIAN) {
3151             val = bswap32(val);
3152         }
3153 #endif
3154         r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3155     } else {
3156         /* RAM case */
3157         addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3158         ptr = qemu_get_ram_ptr(addr1);
3159         switch (endian) {
3160         case DEVICE_LITTLE_ENDIAN:
3161             stl_le_p(ptr, val);
3162             break;
3163         case DEVICE_BIG_ENDIAN:
3164             stl_be_p(ptr, val);
3165             break;
3166         default:
3167             stl_p(ptr, val);
3168             break;
3169         }
3170         invalidate_and_set_dirty(mr, addr1, 4);
3171         r = MEMTX_OK;
3172     }
3173     if (result) {
3174         *result = r;
3175     }
3176     if (release_lock) {
3177         qemu_mutex_unlock_iothread();
3178     }
3179     rcu_read_unlock();
3180 }
3181
3182 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3183                        MemTxAttrs attrs, MemTxResult *result)
3184 {
3185     address_space_stl_internal(as, addr, val, attrs, result,
3186                                DEVICE_NATIVE_ENDIAN);
3187 }
3188
3189 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3190                        MemTxAttrs attrs, MemTxResult *result)
3191 {
3192     address_space_stl_internal(as, addr, val, attrs, result,
3193                                DEVICE_LITTLE_ENDIAN);
3194 }
3195
3196 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3197                        MemTxAttrs attrs, MemTxResult *result)
3198 {
3199     address_space_stl_internal(as, addr, val, attrs, result,
3200                                DEVICE_BIG_ENDIAN);
3201 }
3202
3203 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3204 {
3205     address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3206 }
3207
3208 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3209 {
3210     address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3211 }
3212
3213 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3214 {
3215     address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3216 }
3217
3218 /* XXX: optimize */
3219 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3220                        MemTxAttrs attrs, MemTxResult *result)
3221 {
3222     uint8_t v = val;
3223     MemTxResult r;
3224
3225     r = address_space_rw(as, addr, attrs, &v, 1, 1);
3226     if (result) {
3227         *result = r;
3228     }
3229 }
3230
3231 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3232 {
3233     address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3234 }
3235
3236 /* warning: addr must be aligned */
3237 static inline void address_space_stw_internal(AddressSpace *as,
3238                                               hwaddr addr, uint32_t val,
3239                                               MemTxAttrs attrs,
3240                                               MemTxResult *result,
3241                                               enum device_endian endian)
3242 {
3243     uint8_t *ptr;
3244     MemoryRegion *mr;
3245     hwaddr l = 2;
3246     hwaddr addr1;
3247     MemTxResult r;
3248     bool release_lock = false;
3249
3250     rcu_read_lock();
3251     mr = address_space_translate(as, addr, &addr1, &l, true);
3252     if (l < 2 || !memory_access_is_direct(mr, true)) {
3253         release_lock |= prepare_mmio_access(mr);
3254
3255 #if defined(TARGET_WORDS_BIGENDIAN)
3256         if (endian == DEVICE_LITTLE_ENDIAN) {
3257             val = bswap16(val);
3258         }
3259 #else
3260         if (endian == DEVICE_BIG_ENDIAN) {
3261             val = bswap16(val);
3262         }
3263 #endif
3264         r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3265     } else {
3266         /* RAM case */
3267         addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3268         ptr = qemu_get_ram_ptr(addr1);
3269         switch (endian) {
3270         case DEVICE_LITTLE_ENDIAN:
3271             stw_le_p(ptr, val);
3272             break;
3273         case DEVICE_BIG_ENDIAN:
3274             stw_be_p(ptr, val);
3275             break;
3276         default:
3277             stw_p(ptr, val);
3278             break;
3279         }
3280         invalidate_and_set_dirty(mr, addr1, 2);
3281         r = MEMTX_OK;
3282     }
3283     if (result) {
3284         *result = r;
3285     }
3286     if (release_lock) {
3287         qemu_mutex_unlock_iothread();
3288     }
3289     rcu_read_unlock();
3290 }
3291
3292 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3293                        MemTxAttrs attrs, MemTxResult *result)
3294 {
3295     address_space_stw_internal(as, addr, val, attrs, result,
3296                                DEVICE_NATIVE_ENDIAN);
3297 }
3298
3299 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3300                        MemTxAttrs attrs, MemTxResult *result)
3301 {
3302     address_space_stw_internal(as, addr, val, attrs, result,
3303                                DEVICE_LITTLE_ENDIAN);
3304 }
3305
3306 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3307                        MemTxAttrs attrs, MemTxResult *result)
3308 {
3309     address_space_stw_internal(as, addr, val, attrs, result,
3310                                DEVICE_BIG_ENDIAN);
3311 }
3312
3313 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3314 {
3315     address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3316 }
3317
3318 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3319 {
3320     address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3321 }
3322
3323 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3324 {
3325     address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3326 }
3327
3328 /* XXX: optimize */
3329 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3330                        MemTxAttrs attrs, MemTxResult *result)
3331 {
3332     MemTxResult r;
3333     val = tswap64(val);
3334     r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3335     if (result) {
3336         *result = r;
3337     }
3338 }
3339
3340 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3341                        MemTxAttrs attrs, MemTxResult *result)
3342 {
3343     MemTxResult r;
3344     val = cpu_to_le64(val);
3345     r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3346     if (result) {
3347         *result = r;
3348     }
3349 }
3350 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3351                        MemTxAttrs attrs, MemTxResult *result)
3352 {
3353     MemTxResult r;
3354     val = cpu_to_be64(val);
3355     r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3356     if (result) {
3357         *result = r;
3358     }
3359 }
3360
3361 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3362 {
3363     address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3364 }
3365
3366 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3367 {
3368     address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3369 }
3370
3371 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3372 {
3373     address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3374 }
3375
3376 /* virtual memory access for debug (includes writing to ROM) */
3377 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3378                         uint8_t *buf, int len, int is_write)
3379 {
3380     int l;
3381     hwaddr phys_addr;
3382     target_ulong page;
3383
3384     while (len > 0) {
3385         page = addr & TARGET_PAGE_MASK;
3386         phys_addr = cpu_get_phys_page_debug(cpu, page);
3387         /* if no physical page mapped, return an error */
3388         if (phys_addr == -1)
3389             return -1;
3390         l = (page + TARGET_PAGE_SIZE) - addr;
3391         if (l > len)
3392             l = len;
3393         phys_addr += (addr & ~TARGET_PAGE_MASK);
3394         if (is_write) {
3395             cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
3396         } else {
3397             address_space_rw(cpu->as, phys_addr, MEMTXATTRS_UNSPECIFIED,
3398                              buf, l, 0);
3399         }
3400         len -= l;
3401         buf += l;
3402         addr += l;
3403     }
3404     return 0;
3405 }
3406 #endif
3407
3408 /*
3409  * A helper function for the _utterly broken_ virtio device model to find out if
3410  * it's running on a big endian machine. Don't do this at home kids!
3411  */
3412 bool target_words_bigendian(void);
3413 bool target_words_bigendian(void)
3414 {
3415 #if defined(TARGET_WORDS_BIGENDIAN)
3416     return true;
3417 #else
3418     return false;
3419 #endif
3420 }
3421
3422 #ifndef CONFIG_USER_ONLY
3423 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3424 {
3425     MemoryRegion*mr;
3426     hwaddr l = 1;
3427     bool res;
3428
3429     rcu_read_lock();
3430     mr = address_space_translate(&address_space_memory,
3431                                  phys_addr, &phys_addr, &l, false);
3432
3433     res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3434     rcu_read_unlock();
3435     return res;
3436 }
3437
3438 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3439 {
3440     RAMBlock *block;
3441     int ret = 0;
3442
3443     rcu_read_lock();
3444     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3445         ret = func(block->idstr, block->host, block->offset,
3446                    block->used_length, opaque);
3447         if (ret) {
3448             break;
3449         }
3450     }
3451     rcu_read_unlock();
3452     return ret;
3453 }
3454 #endif