exec.c

   1 /*
   2  *  Virtual page mapping
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include "config.h"
  20 #ifndef _WIN32
  21 #include <sys/types.h>
  22 #include <sys/mman.h>
  23 #endif
  24
  25 #include "qemu-common.h"
  26 #include "cpu.h"
  27 #include "tcg.h"
  28 #include "hw/hw.h"
  29 #if !defined(CONFIG_USER_ONLY)
  30 #include "hw/boards.h"
  31 #endif
  32 #include "hw/qdev.h"
  33 #include "qemu/osdep.h"
  34 #include "sysemu/kvm.h"
  35 #include "sysemu/sysemu.h"
  36 #include "hw/xen/xen.h"
  37 #include "qemu/timer.h"
  38 #include "qemu/config-file.h"
  39 #include "qemu/error-report.h"
  40 #include "exec/memory.h"
  41 #include "sysemu/dma.h"
  42 #include "exec/address-spaces.h"
  43 #if defined(CONFIG_USER_ONLY)
  44 #include <qemu.h>
  45 #else /* !CONFIG_USER_ONLY */
  46 #include "sysemu/xen-mapcache.h"
  47 #include "trace.h"
  48 #endif
  49 #include "exec/cpu-all.h"
  50 #include "qemu/rcu_queue.h"
  51 #include "qemu/main-loop.h"
  52 #include "translate-all.h"
  53 #include "sysemu/replay.h"
  54 #include "sysemu/qtest.h"
  55
  56 #include "exec/memory-internal.h"
  57 #include "exec/ram_addr.h"
  58
  59 #include "qemu/range.h"
  60 #ifndef _WIN32
  61 #include "qemu/mmap-alloc.h"
  62 #endif
  63
  64 //#define DEBUG_SUBPAGE
  65
  66 #if !defined(CONFIG_USER_ONLY)
  67 /* ram_list is read under rcu_read_lock()/rcu_read_unlock().  Writes
  68  * are protected by the ramlist lock.
  69  */
  70 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
  71
  72 static MemoryRegion *system_memory;
  73 static MemoryRegion *system_io;
  74
  75 AddressSpace address_space_io;
  76 AddressSpace address_space_memory;
  77
  78 MemoryRegion io_mem_rom, io_mem_notdirty;
  79 static MemoryRegion io_mem_unassigned;
  80
  81 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
  82 #define RAM_PREALLOC   (1 << 0)
  83
  84 /* RAM is mmap-ed with MAP_SHARED */
  85 #define RAM_SHARED     (1 << 1)
  86
  87 /* Only a portion of RAM (used_length) is actually used, and migrated.
  88  * This used_length size can change across reboots.
  89  */
  90 #define RAM_RESIZEABLE (1 << 2)
  91
  92 /* RAM is backed by an mmapped file.
  93  */
  94 #define RAM_FILE (1 << 3)
  95 #endif
  96
  97 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
  98 /* current CPU in the current thread. It is only valid inside
  99    cpu_exec() */
 100 __thread CPUState *current_cpu;
 101 /* 0 = Do not count executed instructions.
 102    1 = Precise instruction counting.
 103    2 = Adaptive rate instruction counting.  */
 104 int use_icount;
 105
 106 #if !defined(CONFIG_USER_ONLY)
 107
 108 typedef struct PhysPageEntry PhysPageEntry;
 109
 110 struct PhysPageEntry {
 111     /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
 112     uint32_t skip : 6;
 113      /* index into phys_sections (!skip) or phys_map_nodes (skip) */
 114     uint32_t ptr : 26;
 115 };
 116
 117 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
 118
 119 /* Size of the L2 (and L3, etc) page tables.  */
 120 #define ADDR_SPACE_BITS 64
 121
 122 #define P_L2_BITS 9
 123 #define P_L2_SIZE (1 << P_L2_BITS)
 124
 125 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
 126
 127 typedef PhysPageEntry Node[P_L2_SIZE];
 128
 129 typedef struct PhysPageMap {
 130     struct rcu_head rcu;
 131
 132     unsigned sections_nb;
 133     unsigned sections_nb_alloc;
 134     unsigned nodes_nb;
 135     unsigned nodes_nb_alloc;
 136     Node *nodes;
 137     MemoryRegionSection *sections;
 138 } PhysPageMap;
 139
 140 struct AddressSpaceDispatch {
 141     struct rcu_head rcu;
 142
 143     /* This is a multi-level map on the physical address space.
 144      * The bottom level has pointers to MemoryRegionSections.
 145      */
 146     PhysPageEntry phys_map;
 147     PhysPageMap map;
 148     AddressSpace *as;
 149 };
 150
 151 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
 152 typedef struct subpage_t {
 153     MemoryRegion iomem;
 154     AddressSpace *as;
 155     hwaddr base;
 156     uint16_t sub_section[TARGET_PAGE_SIZE];
 157 } subpage_t;
 158
 159 #define PHYS_SECTION_UNASSIGNED 0
 160 #define PHYS_SECTION_NOTDIRTY 1
 161 #define PHYS_SECTION_ROM 2
 162 #define PHYS_SECTION_WATCH 3
 163
 164 static void io_mem_init(void);
 165 static void memory_map_init(void);
 166 static void tcg_commit(MemoryListener *listener);
 167
 168 static MemoryRegion io_mem_watch;
 169
 170 /**
 171  * CPUAddressSpace: all the information a CPU needs about an AddressSpace
 172  * @cpu: the CPU whose AddressSpace this is
 173  * @as: the AddressSpace itself
 174  * @memory_dispatch: its dispatch pointer (cached, RCU protected)
 175  * @tcg_as_listener: listener for tracking changes to the AddressSpace
 176  */
 177 struct CPUAddressSpace {
 178     CPUState *cpu;
 179     AddressSpace *as;
 180     struct AddressSpaceDispatch *memory_dispatch;
 181     MemoryListener tcg_as_listener;
 182 };
 183
 184 #endif
 185
 186 #if !defined(CONFIG_USER_ONLY)
 187
 188 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
 189 {
 190     if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
 191         map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
 192         map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
 193         map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
 194     }
 195 }
 196
 197 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
 198 {
 199     unsigned i;
 200     uint32_t ret;
 201     PhysPageEntry e;
 202     PhysPageEntry *p;
 203
 204     ret = map->nodes_nb++;
 205     p = map->nodes[ret];
 206     assert(ret != PHYS_MAP_NODE_NIL);
 207     assert(ret != map->nodes_nb_alloc);
 208
 209     e.skip = leaf ? 0 : 1;
 210     e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
 211     for (i = 0; i < P_L2_SIZE; ++i) {
 212         memcpy(&p[i], &e, sizeof(e));
 213     }
 214     return ret;
 215 }
 216
 217 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
 218                                 hwaddr *index, hwaddr *nb, uint16_t leaf,
 219                                 int level)
 220 {
 221     PhysPageEntry *p;
 222     hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
 223
 224     if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
 225         lp->ptr = phys_map_node_alloc(map, level == 0);
 226     }
 227     p = map->nodes[lp->ptr];
 228     lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
 229
 230     while (*nb && lp < &p[P_L2_SIZE]) {
 231         if ((*index & (step - 1)) == 0 && *nb >= step) {
 232             lp->skip = 0;
 233             lp->ptr = leaf;
 234             *index += step;
 235             *nb -= step;
 236         } else {
 237             phys_page_set_level(map, lp, index, nb, leaf, level - 1);
 238         }
 239         ++lp;
 240     }
 241 }
 242
 243 static void phys_page_set(AddressSpaceDispatch *d,
 244                           hwaddr index, hwaddr nb,
 245                           uint16_t leaf)
 246 {
 247     /* Wildly overreserve - it doesn't matter much. */
 248     phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
 249
 250     phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
 251 }
 252
 253 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
 254  * and update our entry so we can skip it and go directly to the destination.
 255  */
 256 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
 257 {
 258     unsigned valid_ptr = P_L2_SIZE;
 259     int valid = 0;
 260     PhysPageEntry *p;
 261     int i;
 262
 263     if (lp->ptr == PHYS_MAP_NODE_NIL) {
 264         return;
 265     }
 266
 267     p = nodes[lp->ptr];
 268     for (i = 0; i < P_L2_SIZE; i++) {
 269         if (p[i].ptr == PHYS_MAP_NODE_NIL) {
 270             continue;
 271         }
 272
 273         valid_ptr = i;
 274         valid++;
 275         if (p[i].skip) {
 276             phys_page_compact(&p[i], nodes, compacted);
 277         }
 278     }
 279
 280     /* We can only compress if there's only one child. */
 281     if (valid != 1) {
 282         return;
 283     }
 284
 285     assert(valid_ptr < P_L2_SIZE);
 286
 287     /* Don't compress if it won't fit in the # of bits we have. */
 288     if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
 289         return;
 290     }
 291
 292     lp->ptr = p[valid_ptr].ptr;
 293     if (!p[valid_ptr].skip) {
 294         /* If our only child is a leaf, make this a leaf. */
 295         /* By design, we should have made this node a leaf to begin with so we
 296          * should never reach here.
 297          * But since it's so simple to handle this, let's do it just in case we
 298          * change this rule.
 299          */
 300         lp->skip = 0;
 301     } else {
 302         lp->skip += p[valid_ptr].skip;
 303     }
 304 }
 305
 306 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
 307 {
 308     DECLARE_BITMAP(compacted, nodes_nb);
 309
 310     if (d->phys_map.skip) {
 311         phys_page_compact(&d->phys_map, d->map.nodes, compacted);
 312     }
 313 }
 314
 315 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
 316                                            Node *nodes, MemoryRegionSection *sections)
 317 {
 318     PhysPageEntry *p;
 319     hwaddr index = addr >> TARGET_PAGE_BITS;
 320     int i;
 321
 322     for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
 323         if (lp.ptr == PHYS_MAP_NODE_NIL) {
 324             return &sections[PHYS_SECTION_UNASSIGNED];
 325         }
 326         p = nodes[lp.ptr];
 327         lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
 328     }
 329
 330     if (sections[lp.ptr].size.hi ||
 331         range_covers_byte(sections[lp.ptr].offset_within_address_space,
 332                           sections[lp.ptr].size.lo, addr)) {
 333         return &sections[lp.ptr];
 334     } else {
 335         return &sections[PHYS_SECTION_UNASSIGNED];
 336     }
 337 }
 338
 339 bool memory_region_is_unassigned(MemoryRegion *mr)
 340 {
 341     return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
 342         && mr != &io_mem_watch;
 343 }
 344
 345 /* Called from RCU critical section */
 346 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
 347                                                         hwaddr addr,
 348                                                         bool resolve_subpage)
 349 {
 350     MemoryRegionSection *section;
 351     subpage_t *subpage;
 352
 353     section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
 354     if (resolve_subpage && section->mr->subpage) {
 355         subpage = container_of(section->mr, subpage_t, iomem);
 356         section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
 357     }
 358     return section;
 359 }
 360
 361 /* Called from RCU critical section */
 362 static MemoryRegionSection *
 363 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
 364                                  hwaddr *plen, bool resolve_subpage)
 365 {
 366     MemoryRegionSection *section;
 367     MemoryRegion *mr;
 368     Int128 diff;
 369
 370     section = address_space_lookup_region(d, addr, resolve_subpage);
 371     /* Compute offset within MemoryRegionSection */
 372     addr -= section->offset_within_address_space;
 373
 374     /* Compute offset within MemoryRegion */
 375     *xlat = addr + section->offset_within_region;
 376
 377     mr = section->mr;
 378
 379     /* MMIO registers can be expected to perform full-width accesses based only
 380      * on their address, without considering adjacent registers that could
 381      * decode to completely different MemoryRegions.  When such registers
 382      * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
 383      * regions overlap wildly.  For this reason we cannot clamp the accesses
 384      * here.
 385      *
 386      * If the length is small (as is the case for address_space_ldl/stl),
 387      * everything works fine.  If the incoming length is large, however,
 388      * the caller really has to do the clamping through memory_access_size.
 389      */
 390     if (memory_region_is_ram(mr)) {
 391         diff = int128_sub(section->size, int128_make64(addr));
 392         *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
 393     }
 394     return section;
 395 }
 396
 397 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
 398 {
 399     if (memory_region_is_ram(mr)) {
 400         return !(is_write && mr->readonly);
 401     }
 402     if (memory_region_is_romd(mr)) {
 403         return !is_write;
 404     }
 405
 406     return false;
 407 }
 408
 409 /* Called from RCU critical section */
 410 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
 411                                       hwaddr *xlat, hwaddr *plen,
 412                                       bool is_write)
 413 {
 414     IOMMUTLBEntry iotlb;
 415     MemoryRegionSection *section;
 416     MemoryRegion *mr;
 417
 418     for (;;) {
 419         AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
 420         section = address_space_translate_internal(d, addr, &addr, plen, true);
 421         mr = section->mr;
 422
 423         if (!mr->iommu_ops) {
 424             break;
 425         }
 426
 427         iotlb = mr->iommu_ops->translate(mr, addr, is_write);
 428         addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
 429                 | (addr & iotlb.addr_mask));
 430         *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
 431         if (!(iotlb.perm & (1 << is_write))) {
 432             mr = &io_mem_unassigned;
 433             break;
 434         }
 435
 436         as = iotlb.target_as;
 437     }
 438
 439     if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
 440         hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
 441         *plen = MIN(page, *plen);
 442     }
 443
 444     *xlat = addr;
 445     return mr;
 446 }
 447
 448 /* Called from RCU critical section */
 449 MemoryRegionSection *
 450 address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
 451                                   hwaddr *xlat, hwaddr *plen)
 452 {
 453     MemoryRegionSection *section;
 454     section = address_space_translate_internal(cpu->cpu_ases[0].memory_dispatch,
 455                                                addr, xlat, plen, false);
 456
 457     assert(!section->mr->iommu_ops);
 458     return section;
 459 }
 460 #endif
 461
 462 #if !defined(CONFIG_USER_ONLY)
 463
 464 static int cpu_common_post_load(void *opaque, int version_id)
 465 {
 466     CPUState *cpu = opaque;
 467
 468     /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
 469        version_id is increased. */
 470     cpu->interrupt_request &= ~0x01;
 471     tlb_flush(cpu, 1);
 472
 473     return 0;
 474 }
 475
 476 static int cpu_common_pre_load(void *opaque)
 477 {
 478     CPUState *cpu = opaque;
 479
 480     cpu->exception_index = -1;
 481
 482     return 0;
 483 }
 484
 485 static bool cpu_common_exception_index_needed(void *opaque)
 486 {
 487     CPUState *cpu = opaque;
 488
 489     return tcg_enabled() && cpu->exception_index != -1;
 490 }
 491
 492 static const VMStateDescription vmstate_cpu_common_exception_index = {
 493     .name = "cpu_common/exception_index",
 494     .version_id = 1,
 495     .minimum_version_id = 1,
 496     .needed = cpu_common_exception_index_needed,
 497     .fields = (VMStateField[]) {
 498         VMSTATE_INT32(exception_index, CPUState),
 499         VMSTATE_END_OF_LIST()
 500     }
 501 };
 502
 503 static bool cpu_common_crash_occurred_needed(void *opaque)
 504 {
 505     CPUState *cpu = opaque;
 506
 507     return cpu->crash_occurred;
 508 }
 509
 510 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
 511     .name = "cpu_common/crash_occurred",
 512     .version_id = 1,
 513     .minimum_version_id = 1,
 514     .needed = cpu_common_crash_occurred_needed,
 515     .fields = (VMStateField[]) {
 516         VMSTATE_BOOL(crash_occurred, CPUState),
 517         VMSTATE_END_OF_LIST()
 518     }
 519 };
 520
 521 const VMStateDescription vmstate_cpu_common = {
 522     .name = "cpu_common",
 523     .version_id = 1,
 524     .minimum_version_id = 1,
 525     .pre_load = cpu_common_pre_load,
 526     .post_load = cpu_common_post_load,
 527     .fields = (VMStateField[]) {
 528         VMSTATE_UINT32(halted, CPUState),
 529         VMSTATE_UINT32(interrupt_request, CPUState),
 530         VMSTATE_END_OF_LIST()
 531     },
 532     .subsections = (const VMStateDescription*[]) {
 533         &vmstate_cpu_common_exception_index,
 534         &vmstate_cpu_common_crash_occurred,
 535         NULL
 536     }
 537 };
 538
 539 #endif
 540
 541 CPUState *qemu_get_cpu(int index)
 542 {
 543     CPUState *cpu;
 544
 545     CPU_FOREACH(cpu) {
 546         if (cpu->cpu_index == index) {
 547             return cpu;
 548         }
 549     }
 550
 551     return NULL;
 552 }
 553
 554 #if !defined(CONFIG_USER_ONLY)
 555 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
 556 {
 557     /* We only support one address space per cpu at the moment.  */
 558     assert(cpu->as == as);
 559
 560     if (cpu->cpu_ases) {
 561         /* We've already registered the listener for our only AS */
 562         return;
 563     }
 564
 565     cpu->cpu_ases = g_new0(CPUAddressSpace, 1);
 566     cpu->cpu_ases[0].cpu = cpu;
 567     cpu->cpu_ases[0].as = as;
 568     cpu->cpu_ases[0].tcg_as_listener.commit = tcg_commit;
 569     memory_listener_register(&cpu->cpu_ases[0].tcg_as_listener, as);
 570 }
 571 #endif
 572
 573 #ifndef CONFIG_USER_ONLY
 574 static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
 575
 576 static int cpu_get_free_index(Error **errp)
 577 {
 578     int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
 579
 580     if (cpu >= MAX_CPUMASK_BITS) {
 581         error_setg(errp, "Trying to use more CPUs than max of %d",
 582                    MAX_CPUMASK_BITS);
 583         return -1;
 584     }
 585
 586     bitmap_set(cpu_index_map, cpu, 1);
 587     return cpu;
 588 }
 589
 590 void cpu_exec_exit(CPUState *cpu)
 591 {
 592     if (cpu->cpu_index == -1) {
 593         /* cpu_index was never allocated by this @cpu or was already freed. */
 594         return;
 595     }
 596
 597     bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
 598     cpu->cpu_index = -1;
 599 }
 600 #else
 601
 602 static int cpu_get_free_index(Error **errp)
 603 {
 604     CPUState *some_cpu;
 605     int cpu_index = 0;
 606
 607     CPU_FOREACH(some_cpu) {
 608         cpu_index++;
 609     }
 610     return cpu_index;
 611 }
 612
 613 void cpu_exec_exit(CPUState *cpu)
 614 {
 615 }
 616 #endif
 617
 618 void cpu_exec_init(CPUState *cpu, Error **errp)
 619 {
 620     CPUClass *cc = CPU_GET_CLASS(cpu);
 621     int cpu_index;
 622     Error *local_err = NULL;
 623
 624 #ifndef CONFIG_USER_ONLY
 625     cpu->as = &address_space_memory;
 626     cpu->thread_id = qemu_get_thread_id();
 627 #endif
 628
 629 #if defined(CONFIG_USER_ONLY)
 630     cpu_list_lock();
 631 #endif
 632     cpu_index = cpu->cpu_index = cpu_get_free_index(&local_err);
 633     if (local_err) {
 634         error_propagate(errp, local_err);
 635 #if defined(CONFIG_USER_ONLY)
 636         cpu_list_unlock();
 637 #endif
 638         return;
 639     }
 640     QTAILQ_INSERT_TAIL(&cpus, cpu, node);
 641 #if defined(CONFIG_USER_ONLY)
 642     cpu_list_unlock();
 643 #endif
 644     if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
 645         vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
 646     }
 647 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
 648     register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
 649                     cpu_save, cpu_load, cpu->env_ptr);
 650     assert(cc->vmsd == NULL);
 651     assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
 652 #endif
 653     if (cc->vmsd != NULL) {
 654         vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
 655     }
 656 }
 657
 658 #if defined(CONFIG_USER_ONLY)
 659 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 660 {
 661     tb_invalidate_phys_page_range(pc, pc + 1, 0);
 662 }
 663 #else
 664 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 665 {
 666     hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
 667     if (phys != -1) {
 668         tb_invalidate_phys_addr(cpu->as,
 669                                 phys | (pc & ~TARGET_PAGE_MASK));
 670     }
 671 }
 672 #endif
 673
 674 #if defined(CONFIG_USER_ONLY)
 675 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 676
 677 {
 678 }
 679
 680 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 681                           int flags)
 682 {
 683     return -ENOSYS;
 684 }
 685
 686 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 687 {
 688 }
 689
 690 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 691                           int flags, CPUWatchpoint **watchpoint)
 692 {
 693     return -ENOSYS;
 694 }
 695 #else
 696 /* Add a watchpoint.  */
 697 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 698                           int flags, CPUWatchpoint **watchpoint)
 699 {
 700     CPUWatchpoint *wp;
 701
 702     /* forbid ranges which are empty or run off the end of the address space */
 703     if (len == 0 || (addr + len - 1) < addr) {
 704         error_report("tried to set invalid watchpoint at %"
 705                      VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
 706         return -EINVAL;
 707     }
 708     wp = g_malloc(sizeof(*wp));
 709
 710     wp->vaddr = addr;
 711     wp->len = len;
 712     wp->flags = flags;
 713
 714     /* keep all GDB-injected watchpoints in front */
 715     if (flags & BP_GDB) {
 716         QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
 717     } else {
 718         QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
 719     }
 720
 721     tlb_flush_page(cpu, addr);
 722
 723     if (watchpoint)
 724         *watchpoint = wp;
 725     return 0;
 726 }
 727
 728 /* Remove a specific watchpoint.  */
 729 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 730                           int flags)
 731 {
 732     CPUWatchpoint *wp;
 733
 734     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
 735         if (addr == wp->vaddr && len == wp->len
 736                 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
 737             cpu_watchpoint_remove_by_ref(cpu, wp);
 738             return 0;
 739         }
 740     }
 741     return -ENOENT;
 742 }
 743
 744 /* Remove a specific watchpoint by reference.  */
 745 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 746 {
 747     QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
 748
 749     tlb_flush_page(cpu, watchpoint->vaddr);
 750
 751     g_free(watchpoint);
 752 }
 753
 754 /* Remove all matching watchpoints.  */
 755 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 756 {
 757     CPUWatchpoint *wp, *next;
 758
 759     QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
 760         if (wp->flags & mask) {
 761             cpu_watchpoint_remove_by_ref(cpu, wp);
 762         }
 763     }
 764 }
 765
 766 /* Return true if this watchpoint address matches the specified
 767  * access (ie the address range covered by the watchpoint overlaps
 768  * partially or completely with the address range covered by the
 769  * access).
 770  */
 771 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
 772                                                   vaddr addr,
 773                                                   vaddr len)
 774 {
 775     /* We know the lengths are non-zero, but a little caution is
 776      * required to avoid errors in the case where the range ends
 777      * exactly at the top of the address space and so addr + len
 778      * wraps round to zero.
 779      */
 780     vaddr wpend = wp->vaddr + wp->len - 1;
 781     vaddr addrend = addr + len - 1;
 782
 783     return !(addr > wpend || wp->vaddr > addrend);
 784 }
 785
 786 #endif
 787
 788 /* Add a breakpoint.  */
 789 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
 790                           CPUBreakpoint **breakpoint)
 791 {
 792     CPUBreakpoint *bp;
 793
 794     bp = g_malloc(sizeof(*bp));
 795
 796     bp->pc = pc;
 797     bp->flags = flags;
 798
 799     /* keep all GDB-injected breakpoints in front */
 800     if (flags & BP_GDB) {
 801         QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
 802     } else {
 803         QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
 804     }
 805
 806     breakpoint_invalidate(cpu, pc);
 807
 808     if (breakpoint) {
 809         *breakpoint = bp;
 810     }
 811     return 0;
 812 }
 813
 814 /* Remove a specific breakpoint.  */
 815 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
 816 {
 817     CPUBreakpoint *bp;
 818
 819     QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
 820         if (bp->pc == pc && bp->flags == flags) {
 821             cpu_breakpoint_remove_by_ref(cpu, bp);
 822             return 0;
 823         }
 824     }
 825     return -ENOENT;
 826 }
 827
 828 /* Remove a specific breakpoint by reference.  */
 829 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
 830 {
 831     QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
 832
 833     breakpoint_invalidate(cpu, breakpoint->pc);
 834
 835     g_free(breakpoint);
 836 }
 837
 838 /* Remove all matching breakpoints. */
 839 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
 840 {
 841     CPUBreakpoint *bp, *next;
 842
 843     QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
 844         if (bp->flags & mask) {
 845             cpu_breakpoint_remove_by_ref(cpu, bp);
 846         }
 847     }
 848 }
 849
 850 /* enable or disable single step mode. EXCP_DEBUG is returned by the
 851    CPU loop after each instruction */
 852 void cpu_single_step(CPUState *cpu, int enabled)
 853 {
 854     if (cpu->singlestep_enabled != enabled) {
 855         cpu->singlestep_enabled = enabled;
 856         if (kvm_enabled()) {
 857             kvm_update_guest_debug(cpu, 0);
 858         } else {
 859             /* must flush all the translated code to avoid inconsistencies */
 860             /* XXX: only flush what is necessary */
 861             tb_flush(cpu);
 862         }
 863     }
 864 }
 865
 866 void cpu_abort(CPUState *cpu, const char *fmt, ...)
 867 {
 868     va_list ap;
 869     va_list ap2;
 870
 871     va_start(ap, fmt);
 872     va_copy(ap2, ap);
 873     fprintf(stderr, "qemu: fatal: ");
 874     vfprintf(stderr, fmt, ap);
 875     fprintf(stderr, "\n");
 876     cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 877     if (qemu_log_enabled()) {
 878         qemu_log("qemu: fatal: ");
 879         qemu_log_vprintf(fmt, ap2);
 880         qemu_log("\n");
 881         log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 882         qemu_log_flush();
 883         qemu_log_close();
 884     }
 885     va_end(ap2);
 886     va_end(ap);
 887     replay_finish();
 888 #if defined(CONFIG_USER_ONLY)
 889     {
 890         struct sigaction act;
 891         sigfillset(&act.sa_mask);
 892         act.sa_handler = SIG_DFL;
 893         sigaction(SIGABRT, &act, NULL);
 894     }
 895 #endif
 896     abort();
 897 }
 898
 899 #if !defined(CONFIG_USER_ONLY)
 900 /* Called from RCU critical section */
 901 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
 902 {
 903     RAMBlock *block;
 904
 905     block = atomic_rcu_read(&ram_list.mru_block);
 906     if (block && addr - block->offset < block->max_length) {
 907         return block;
 908     }
 909     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
 910         if (addr - block->offset < block->max_length) {
 911             goto found;
 912         }
 913     }
 914
 915     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
 916     abort();
 917
 918 found:
 919     /* It is safe to write mru_block outside the iothread lock.  This
 920      * is what happens:
 921      *
 922      *     mru_block = xxx
 923      *     rcu_read_unlock()
 924      *                                        xxx removed from list
 925      *                  rcu_read_lock()
 926      *                  read mru_block
 927      *                                        mru_block = NULL;
 928      *                                        call_rcu(reclaim_ramblock, xxx);
 929      *                  rcu_read_unlock()
 930      *
 931      * atomic_rcu_set is not needed here.  The block was already published
 932      * when it was placed into the list.  Here we're just making an extra
 933      * copy of the pointer.
 934      */
 935     ram_list.mru_block = block;
 936     return block;
 937 }
 938
 939 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
 940 {
 941     CPUState *cpu;
 942     ram_addr_t start1;
 943     RAMBlock *block;
 944     ram_addr_t end;
 945
 946     end = TARGET_PAGE_ALIGN(start + length);
 947     start &= TARGET_PAGE_MASK;
 948
 949     rcu_read_lock();
 950     block = qemu_get_ram_block(start);
 951     assert(block == qemu_get_ram_block(end - 1));
 952     start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
 953     CPU_FOREACH(cpu) {
 954         tlb_reset_dirty(cpu, start1, length);
 955     }
 956     rcu_read_unlock();
 957 }
 958
 959 /* Note: start and end must be within the same ram block.  */
 960 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
 961                                               ram_addr_t length,
 962                                               unsigned client)
 963 {
 964     unsigned long end, page;
 965     bool dirty;
 966
 967     if (length == 0) {
 968         return false;
 969     }
 970
 971     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
 972     page = start >> TARGET_PAGE_BITS;
 973     dirty = bitmap_test_and_clear_atomic(ram_list.dirty_memory[client],
 974                                          page, end - page);
 975
 976     if (dirty && tcg_enabled()) {
 977         tlb_reset_dirty_range_all(start, length);
 978     }
 979
 980     return dirty;
 981 }
 982
 983 /* Called from RCU critical section */
 984 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
 985                                        MemoryRegionSection *section,
 986                                        target_ulong vaddr,
 987                                        hwaddr paddr, hwaddr xlat,
 988                                        int prot,
 989                                        target_ulong *address)
 990 {
 991     hwaddr iotlb;
 992     CPUWatchpoint *wp;
 993
 994     if (memory_region_is_ram(section->mr)) {
 995         /* Normal RAM.  */
 996         iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
 997             + xlat;
 998         if (!section->readonly) {
 999             iotlb |= PHYS_SECTION_NOTDIRTY;
1000         } else {
1001             iotlb |= PHYS_SECTION_ROM;
1002         }
1003     } else {
1004         AddressSpaceDispatch *d;
1005
1006         d = atomic_rcu_read(&section->address_space->dispatch);
1007         iotlb = section - d->map.sections;
1008         iotlb += xlat;
1009     }
1010
1011     /* Make accesses to pages with watchpoints go via the
1012        watchpoint trap routines.  */
1013     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1014         if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1015             /* Avoid trapping reads of pages with a write breakpoint. */
1016             if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1017                 iotlb = PHYS_SECTION_WATCH + paddr;
1018                 *address |= TLB_MMIO;
1019                 break;
1020             }
1021         }
1022     }
1023
1024     return iotlb;
1025 }
1026 #endif /* defined(CONFIG_USER_ONLY) */
1027
1028 #if !defined(CONFIG_USER_ONLY)
1029
1030 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1031                              uint16_t section);
1032 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1033
1034 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1035                                qemu_anon_ram_alloc;
1036
1037 /*
1038  * Set a custom physical guest memory alloator.
1039  * Accelerators with unusual needs may need this.  Hopefully, we can
1040  * get rid of it eventually.
1041  */
1042 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1043 {
1044     phys_mem_alloc = alloc;
1045 }
1046
1047 static uint16_t phys_section_add(PhysPageMap *map,
1048                                  MemoryRegionSection *section)
1049 {
1050     /* The physical section number is ORed with a page-aligned
1051      * pointer to produce the iotlb entries.  Thus it should
1052      * never overflow into the page-aligned value.
1053      */
1054     assert(map->sections_nb < TARGET_PAGE_SIZE);
1055
1056     if (map->sections_nb == map->sections_nb_alloc) {
1057         map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1058         map->sections = g_renew(MemoryRegionSection, map->sections,
1059                                 map->sections_nb_alloc);
1060     }
1061     map->sections[map->sections_nb] = *section;
1062     memory_region_ref(section->mr);
1063     return map->sections_nb++;
1064 }
1065
1066 static void phys_section_destroy(MemoryRegion *mr)
1067 {
1068     memory_region_unref(mr);
1069
1070     if (mr->subpage) {
1071         subpage_t *subpage = container_of(mr, subpage_t, iomem);
1072         object_unref(OBJECT(&subpage->iomem));
1073         g_free(subpage);
1074     }
1075 }
1076
1077 static void phys_sections_free(PhysPageMap *map)
1078 {
1079     while (map->sections_nb > 0) {
1080         MemoryRegionSection *section = &map->sections[--map->sections_nb];
1081         phys_section_destroy(section->mr);
1082     }
1083     g_free(map->sections);
1084     g_free(map->nodes);
1085 }
1086
1087 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1088 {
1089     subpage_t *subpage;
1090     hwaddr base = section->offset_within_address_space
1091         & TARGET_PAGE_MASK;
1092     MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1093                                                    d->map.nodes, d->map.sections);
1094     MemoryRegionSection subsection = {
1095         .offset_within_address_space = base,
1096         .size = int128_make64(TARGET_PAGE_SIZE),
1097     };
1098     hwaddr start, end;
1099
1100     assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1101
1102     if (!(existing->mr->subpage)) {
1103         subpage = subpage_init(d->as, base);
1104         subsection.address_space = d->as;
1105         subsection.mr = &subpage->iomem;
1106         phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1107                       phys_section_add(&d->map, &subsection));
1108     } else {
1109         subpage = container_of(existing->mr, subpage_t, iomem);
1110     }
1111     start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1112     end = start + int128_get64(section->size) - 1;
1113     subpage_register(subpage, start, end,
1114                      phys_section_add(&d->map, section));
1115 }
1116
1117
1118 static void register_multipage(AddressSpaceDispatch *d,
1119                                MemoryRegionSection *section)
1120 {
1121     hwaddr start_addr = section->offset_within_address_space;
1122     uint16_t section_index = phys_section_add(&d->map, section);
1123     uint64_t num_pages = int128_get64(int128_rshift(section->size,
1124                                                     TARGET_PAGE_BITS));
1125
1126     assert(num_pages);
1127     phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1128 }
1129
1130 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1131 {
1132     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1133     AddressSpaceDispatch *d = as->next_dispatch;
1134     MemoryRegionSection now = *section, remain = *section;
1135     Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1136
1137     if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1138         uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1139                        - now.offset_within_address_space;
1140
1141         now.size = int128_min(int128_make64(left), now.size);
1142         register_subpage(d, &now);
1143     } else {
1144         now.size = int128_zero();
1145     }
1146     while (int128_ne(remain.size, now.size)) {
1147         remain.size = int128_sub(remain.size, now.size);
1148         remain.offset_within_address_space += int128_get64(now.size);
1149         remain.offset_within_region += int128_get64(now.size);
1150         now = remain;
1151         if (int128_lt(remain.size, page_size)) {
1152             register_subpage(d, &now);
1153         } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1154             now.size = page_size;
1155             register_subpage(d, &now);
1156         } else {
1157             now.size = int128_and(now.size, int128_neg(page_size));
1158             register_multipage(d, &now);
1159         }
1160     }
1161 }
1162
1163 void qemu_flush_coalesced_mmio_buffer(void)
1164 {
1165     if (kvm_enabled())
1166         kvm_flush_coalesced_mmio_buffer();
1167 }
1168
1169 void qemu_mutex_lock_ramlist(void)
1170 {
1171     qemu_mutex_lock(&ram_list.mutex);
1172 }
1173
1174 void qemu_mutex_unlock_ramlist(void)
1175 {
1176     qemu_mutex_unlock(&ram_list.mutex);
1177 }
1178
1179 #ifdef __linux__
1180
1181 #include <sys/vfs.h>
1182
1183 #define HUGETLBFS_MAGIC       0x958458f6
1184
1185 static long gethugepagesize(const char *path, Error **errp)
1186 {
1187     struct statfs fs;
1188     int ret;
1189
1190     do {
1191         ret = statfs(path, &fs);
1192     } while (ret != 0 && errno == EINTR);
1193
1194     if (ret != 0) {
1195         error_setg_errno(errp, errno, "failed to get page size of file %s",
1196                          path);
1197         return 0;
1198     }
1199
1200     if (!qtest_driver() &&
1201         fs.f_type != HUGETLBFS_MAGIC) {
1202         fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1203     }
1204
1205     return fs.f_bsize;
1206 }
1207
1208 static void *file_ram_alloc(RAMBlock *block,
1209                             ram_addr_t memory,
1210                             const char *path,
1211                             Error **errp)
1212 {
1213     struct stat st;
1214     char *filename;
1215     char *sanitized_name;
1216     char *c;
1217     void *area;
1218     int fd;
1219     uint64_t hpagesize;
1220     Error *local_err = NULL;
1221
1222     hpagesize = gethugepagesize(path, &local_err);
1223     if (local_err) {
1224         error_propagate(errp, local_err);
1225         goto error;
1226     }
1227     block->mr->align = hpagesize;
1228
1229     if (memory < hpagesize) {
1230         error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1231                    "or larger than huge page size 0x%" PRIx64,
1232                    memory, hpagesize);
1233         goto error;
1234     }
1235
1236     if (kvm_enabled() && !kvm_has_sync_mmu()) {
1237         error_setg(errp,
1238                    "host lacks kvm mmu notifiers, -mem-path unsupported");
1239         goto error;
1240     }
1241
1242     if (!stat(path, &st) && S_ISDIR(st.st_mode)) {
1243         /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1244         sanitized_name = g_strdup(memory_region_name(block->mr));
1245         for (c = sanitized_name; *c != '\0'; c++) {
1246             if (*c == '/') {
1247                 *c = '_';
1248             }
1249         }
1250
1251         filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1252                                    sanitized_name);
1253         g_free(sanitized_name);
1254
1255         fd = mkstemp(filename);
1256         if (fd >= 0) {
1257             unlink(filename);
1258         }
1259         g_free(filename);
1260     } else {
1261         fd = open(path, O_RDWR | O_CREAT, 0644);
1262     }
1263
1264     if (fd < 0) {
1265         error_setg_errno(errp, errno,
1266                          "unable to create backing store for hugepages");
1267         goto error;
1268     }
1269
1270     memory = ROUND_UP(memory, hpagesize);
1271
1272     /*
1273      * ftruncate is not supported by hugetlbfs in older
1274      * hosts, so don't bother bailing out on errors.
1275      * If anything goes wrong with it under other filesystems,
1276      * mmap will fail.
1277      */
1278     if (ftruncate(fd, memory)) {
1279         perror("ftruncate");
1280     }
1281
1282     area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
1283     if (area == MAP_FAILED) {
1284         error_setg_errno(errp, errno,
1285                          "unable to map backing store for hugepages");
1286         close(fd);
1287         goto error;
1288     }
1289
1290     if (mem_prealloc) {
1291         os_mem_prealloc(fd, area, memory);
1292     }
1293
1294     block->fd = fd;
1295     return area;
1296
1297 error:
1298     return NULL;
1299 }
1300 #endif
1301
1302 /* Called with the ramlist lock held.  */
1303 static ram_addr_t find_ram_offset(ram_addr_t size)
1304 {
1305     RAMBlock *block, *next_block;
1306     ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1307
1308     assert(size != 0); /* it would hand out same offset multiple times */
1309
1310     if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1311         return 0;
1312     }
1313
1314     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1315         ram_addr_t end, next = RAM_ADDR_MAX;
1316
1317         end = block->offset + block->max_length;
1318
1319         QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1320             if (next_block->offset >= end) {
1321                 next = MIN(next, next_block->offset);
1322             }
1323         }
1324         if (next - end >= size && next - end < mingap) {
1325             offset = end;
1326             mingap = next - end;
1327         }
1328     }
1329
1330     if (offset == RAM_ADDR_MAX) {
1331         fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1332                 (uint64_t)size);
1333         abort();
1334     }
1335
1336     return offset;
1337 }
1338
1339 ram_addr_t last_ram_offset(void)
1340 {
1341     RAMBlock *block;
1342     ram_addr_t last = 0;
1343
1344     rcu_read_lock();
1345     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1346         last = MAX(last, block->offset + block->max_length);
1347     }
1348     rcu_read_unlock();
1349     return last;
1350 }
1351
1352 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1353 {
1354     int ret;
1355
1356     /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1357     if (!machine_dump_guest_core(current_machine)) {
1358         ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1359         if (ret) {
1360             perror("qemu_madvise");
1361             fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1362                             "but dump_guest_core=off specified\n");
1363         }
1364     }
1365 }
1366
1367 /* Called within an RCU critical section, or while the ramlist lock
1368  * is held.
1369  */
1370 static RAMBlock *find_ram_block(ram_addr_t addr)
1371 {
1372     RAMBlock *block;
1373
1374     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1375         if (block->offset == addr) {
1376             return block;
1377         }
1378     }
1379
1380     return NULL;
1381 }
1382
1383 const char *qemu_ram_get_idstr(RAMBlock *rb)
1384 {
1385     return rb->idstr;
1386 }
1387
1388 /* Called with iothread lock held.  */
1389 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1390 {
1391     RAMBlock *new_block, *block;
1392
1393     rcu_read_lock();
1394     new_block = find_ram_block(addr);
1395     assert(new_block);
1396     assert(!new_block->idstr[0]);
1397
1398     if (dev) {
1399         char *id = qdev_get_dev_path(dev);
1400         if (id) {
1401             snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1402             g_free(id);
1403         }
1404     }
1405     pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1406
1407     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1408         if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1409             fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1410                     new_block->idstr);
1411             abort();
1412         }
1413     }
1414     rcu_read_unlock();
1415 }
1416
1417 /* Called with iothread lock held.  */
1418 void qemu_ram_unset_idstr(ram_addr_t addr)
1419 {
1420     RAMBlock *block;
1421
1422     /* FIXME: arch_init.c assumes that this is not called throughout
1423      * migration.  Ignore the problem since hot-unplug during migration
1424      * does not work anyway.
1425      */
1426
1427     rcu_read_lock();
1428     block = find_ram_block(addr);
1429     if (block) {
1430         memset(block->idstr, 0, sizeof(block->idstr));
1431     }
1432     rcu_read_unlock();
1433 }
1434
1435 static int memory_try_enable_merging(void *addr, size_t len)
1436 {
1437     if (!machine_mem_merge(current_machine)) {
1438         /* disabled by the user */
1439         return 0;
1440     }
1441
1442     return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1443 }
1444
1445 /* Only legal before guest might have detected the memory size: e.g. on
1446  * incoming migration, or right after reset.
1447  *
1448  * As memory core doesn't know how is memory accessed, it is up to
1449  * resize callback to update device state and/or add assertions to detect
1450  * misuse, if necessary.
1451  */
1452 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1453 {
1454     RAMBlock *block = find_ram_block(base);
1455
1456     assert(block);
1457
1458     newsize = HOST_PAGE_ALIGN(newsize);
1459
1460     if (block->used_length == newsize) {
1461         return 0;
1462     }
1463
1464     if (!(block->flags & RAM_RESIZEABLE)) {
1465         error_setg_errno(errp, EINVAL,
1466                          "Length mismatch: %s: 0x" RAM_ADDR_FMT
1467                          " in != 0x" RAM_ADDR_FMT, block->idstr,
1468                          newsize, block->used_length);
1469         return -EINVAL;
1470     }
1471
1472     if (block->max_length < newsize) {
1473         error_setg_errno(errp, EINVAL,
1474                          "Length too large: %s: 0x" RAM_ADDR_FMT
1475                          " > 0x" RAM_ADDR_FMT, block->idstr,
1476                          newsize, block->max_length);
1477         return -EINVAL;
1478     }
1479
1480     cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1481     block->used_length = newsize;
1482     cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1483                                         DIRTY_CLIENTS_ALL);
1484     memory_region_set_size(block->mr, newsize);
1485     if (block->resized) {
1486         block->resized(block->idstr, newsize, block->host);
1487     }
1488     return 0;
1489 }
1490
1491 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1492 {
1493     RAMBlock *block;
1494     RAMBlock *last_block = NULL;
1495     ram_addr_t old_ram_size, new_ram_size;
1496
1497     old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1498
1499     qemu_mutex_lock_ramlist();
1500     new_block->offset = find_ram_offset(new_block->max_length);
1501
1502     if (!new_block->host) {
1503         if (xen_enabled()) {
1504             xen_ram_alloc(new_block->offset, new_block->max_length,
1505                           new_block->mr);
1506         } else {
1507             new_block->host = phys_mem_alloc(new_block->max_length,
1508                                              &new_block->mr->align);
1509             if (!new_block->host) {
1510                 error_setg_errno(errp, errno,
1511                                  "cannot set up guest memory '%s'",
1512                                  memory_region_name(new_block->mr));
1513                 qemu_mutex_unlock_ramlist();
1514                 return -1;
1515             }
1516             memory_try_enable_merging(new_block->host, new_block->max_length);
1517         }
1518     }
1519
1520     new_ram_size = MAX(old_ram_size,
1521               (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1522     if (new_ram_size > old_ram_size) {
1523         migration_bitmap_extend(old_ram_size, new_ram_size);
1524     }
1525     /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
1526      * QLIST (which has an RCU-friendly variant) does not have insertion at
1527      * tail, so save the last element in last_block.
1528      */
1529     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1530         last_block = block;
1531         if (block->max_length < new_block->max_length) {
1532             break;
1533         }
1534     }
1535     if (block) {
1536         QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1537     } else if (last_block) {
1538         QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1539     } else { /* list is empty */
1540         QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1541     }
1542     ram_list.mru_block = NULL;
1543
1544     /* Write list before version */
1545     smp_wmb();
1546     ram_list.version++;
1547     qemu_mutex_unlock_ramlist();
1548
1549     new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1550
1551     if (new_ram_size > old_ram_size) {
1552         int i;
1553
1554         /* ram_list.dirty_memory[] is protected by the iothread lock.  */
1555         for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1556             ram_list.dirty_memory[i] =
1557                 bitmap_zero_extend(ram_list.dirty_memory[i],
1558                                    old_ram_size, new_ram_size);
1559        }
1560     }
1561     cpu_physical_memory_set_dirty_range(new_block->offset,
1562                                         new_block->used_length,
1563                                         DIRTY_CLIENTS_ALL);
1564
1565     if (new_block->host) {
1566         qemu_ram_setup_dump(new_block->host, new_block->max_length);
1567         qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1568         qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1569         if (kvm_enabled()) {
1570             kvm_setup_guest_memory(new_block->host, new_block->max_length);
1571         }
1572     }
1573
1574     return new_block->offset;
1575 }
1576
1577 #ifdef __linux__
1578 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1579                                     bool share, const char *mem_path,
1580                                     Error **errp)
1581 {
1582     RAMBlock *new_block;
1583     ram_addr_t addr;
1584     Error *local_err = NULL;
1585
1586     if (xen_enabled()) {
1587         error_setg(errp, "-mem-path not supported with Xen");
1588         return -1;
1589     }
1590
1591     if (phys_mem_alloc != qemu_anon_ram_alloc) {
1592         /*
1593          * file_ram_alloc() needs to allocate just like
1594          * phys_mem_alloc, but we haven't bothered to provide
1595          * a hook there.
1596          */
1597         error_setg(errp,
1598                    "-mem-path not supported with this accelerator");
1599         return -1;
1600     }
1601
1602     size = HOST_PAGE_ALIGN(size);
1603     new_block = g_malloc0(sizeof(*new_block));
1604     new_block->mr = mr;
1605     new_block->used_length = size;
1606     new_block->max_length = size;
1607     new_block->flags = share ? RAM_SHARED : 0;
1608     new_block->flags |= RAM_FILE;
1609     new_block->host = file_ram_alloc(new_block, size,
1610                                      mem_path, errp);
1611     if (!new_block->host) {
1612         g_free(new_block);
1613         return -1;
1614     }
1615
1616     addr = ram_block_add(new_block, &local_err);
1617     if (local_err) {
1618         g_free(new_block);
1619         error_propagate(errp, local_err);
1620         return -1;
1621     }
1622     return addr;
1623 }
1624 #endif
1625
1626 static
1627 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1628                                    void (*resized)(const char*,
1629                                                    uint64_t length,
1630                                                    void *host),
1631                                    void *host, bool resizeable,
1632                                    MemoryRegion *mr, Error **errp)
1633 {
1634     RAMBlock *new_block;
1635     ram_addr_t addr;
1636     Error *local_err = NULL;
1637
1638     size = HOST_PAGE_ALIGN(size);
1639     max_size = HOST_PAGE_ALIGN(max_size);
1640     new_block = g_malloc0(sizeof(*new_block));
1641     new_block->mr = mr;
1642     new_block->resized = resized;
1643     new_block->used_length = size;
1644     new_block->max_length = max_size;
1645     assert(max_size >= size);
1646     new_block->fd = -1;
1647     new_block->host = host;
1648     if (host) {
1649         new_block->flags |= RAM_PREALLOC;
1650     }
1651     if (resizeable) {
1652         new_block->flags |= RAM_RESIZEABLE;
1653     }
1654     addr = ram_block_add(new_block, &local_err);
1655     if (local_err) {
1656         g_free(new_block);
1657         error_propagate(errp, local_err);
1658         return -1;
1659     }
1660     return addr;
1661 }
1662
1663 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1664                                    MemoryRegion *mr, Error **errp)
1665 {
1666     return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1667 }
1668
1669 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1670 {
1671     return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1672 }
1673
1674 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1675                                      void (*resized)(const char*,
1676                                                      uint64_t length,
1677                                                      void *host),
1678                                      MemoryRegion *mr, Error **errp)
1679 {
1680     return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1681 }
1682
1683 void qemu_ram_free_from_ptr(ram_addr_t addr)
1684 {
1685     RAMBlock *block;
1686
1687     qemu_mutex_lock_ramlist();
1688     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1689         if (addr == block->offset) {
1690             QLIST_REMOVE_RCU(block, next);
1691             ram_list.mru_block = NULL;
1692             /* Write list before version */
1693             smp_wmb();
1694             ram_list.version++;
1695             g_free_rcu(block, rcu);
1696             break;
1697         }
1698     }
1699     qemu_mutex_unlock_ramlist();
1700 }
1701
1702 static void reclaim_ramblock(RAMBlock *block)
1703 {
1704     if (block->flags & RAM_PREALLOC) {
1705         ;
1706     } else if (xen_enabled()) {
1707         xen_invalidate_map_cache_entry(block->host);
1708 #ifndef _WIN32
1709     } else if (block->fd >= 0) {
1710         if (block->flags & RAM_FILE) {
1711             qemu_ram_munmap(block->host, block->max_length);
1712         } else {
1713             munmap(block->host, block->max_length);
1714         }
1715         close(block->fd);
1716 #endif
1717     } else {
1718         qemu_anon_ram_free(block->host, block->max_length);
1719     }
1720     g_free(block);
1721 }
1722
1723 void qemu_ram_free(ram_addr_t addr)
1724 {
1725     RAMBlock *block;
1726
1727     qemu_mutex_lock_ramlist();
1728     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1729         if (addr == block->offset) {
1730             QLIST_REMOVE_RCU(block, next);
1731             ram_list.mru_block = NULL;
1732             /* Write list before version */
1733             smp_wmb();
1734             ram_list.version++;
1735             call_rcu(block, reclaim_ramblock, rcu);
1736             break;
1737         }
1738     }
1739     qemu_mutex_unlock_ramlist();
1740 }
1741
1742 #ifndef _WIN32
1743 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1744 {
1745     RAMBlock *block;
1746     ram_addr_t offset;
1747     int flags;
1748     void *area, *vaddr;
1749
1750     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1751         offset = addr - block->offset;
1752         if (offset < block->max_length) {
1753             vaddr = ramblock_ptr(block, offset);
1754             if (block->flags & RAM_PREALLOC) {
1755                 ;
1756             } else if (xen_enabled()) {
1757                 abort();
1758             } else {
1759                 flags = MAP_FIXED;
1760                 if (block->fd >= 0) {
1761                     flags |= (block->flags & RAM_SHARED ?
1762                               MAP_SHARED : MAP_PRIVATE);
1763                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1764                                 flags, block->fd, offset);
1765                 } else {
1766                     /*
1767                      * Remap needs to match alloc.  Accelerators that
1768                      * set phys_mem_alloc never remap.  If they did,
1769                      * we'd need a remap hook here.
1770                      */
1771                     assert(phys_mem_alloc == qemu_anon_ram_alloc);
1772
1773                     flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1774                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1775                                 flags, -1, 0);
1776                 }
1777                 if (area != vaddr) {
1778                     fprintf(stderr, "Could not remap addr: "
1779                             RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1780                             length, addr);
1781                     exit(1);
1782                 }
1783                 memory_try_enable_merging(vaddr, length);
1784                 qemu_ram_setup_dump(vaddr, length);
1785             }
1786         }
1787     }
1788 }
1789 #endif /* !_WIN32 */
1790
1791 int qemu_get_ram_fd(ram_addr_t addr)
1792 {
1793     RAMBlock *block;
1794     int fd;
1795
1796     rcu_read_lock();
1797     block = qemu_get_ram_block(addr);
1798     fd = block->fd;
1799     rcu_read_unlock();
1800     return fd;
1801 }
1802
1803 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1804 {
1805     RAMBlock *block;
1806     void *ptr;
1807
1808     rcu_read_lock();
1809     block = qemu_get_ram_block(addr);
1810     ptr = ramblock_ptr(block, 0);
1811     rcu_read_unlock();
1812     return ptr;
1813 }
1814
1815 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1816  * This should not be used for general purpose DMA.  Use address_space_map
1817  * or address_space_rw instead. For local memory (e.g. video ram) that the
1818  * device owns, use memory_region_get_ram_ptr.
1819  *
1820  * By the time this function returns, the returned pointer is not protected
1821  * by RCU anymore.  If the caller is not within an RCU critical section and
1822  * does not hold the iothread lock, it must have other means of protecting the
1823  * pointer, such as a reference to the region that includes the incoming
1824  * ram_addr_t.
1825  */
1826 void *qemu_get_ram_ptr(ram_addr_t addr)
1827 {
1828     RAMBlock *block;
1829     void *ptr;
1830
1831     rcu_read_lock();
1832     block = qemu_get_ram_block(addr);
1833
1834     if (xen_enabled() && block->host == NULL) {
1835         /* We need to check if the requested address is in the RAM
1836          * because we don't want to map the entire memory in QEMU.
1837          * In that case just map until the end of the page.
1838          */
1839         if (block->offset == 0) {
1840             ptr = xen_map_cache(addr, 0, 0);
1841             goto unlock;
1842         }
1843
1844         block->host = xen_map_cache(block->offset, block->max_length, 1);
1845     }
1846     ptr = ramblock_ptr(block, addr - block->offset);
1847
1848 unlock:
1849     rcu_read_unlock();
1850     return ptr;
1851 }
1852
1853 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1854  * but takes a size argument.
1855  *
1856  * By the time this function returns, the returned pointer is not protected
1857  * by RCU anymore.  If the caller is not within an RCU critical section and
1858  * does not hold the iothread lock, it must have other means of protecting the
1859  * pointer, such as a reference to the region that includes the incoming
1860  * ram_addr_t.
1861  */
1862 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1863 {
1864     void *ptr;
1865     if (*size == 0) {
1866         return NULL;
1867     }
1868     if (xen_enabled()) {
1869         return xen_map_cache(addr, *size, 1);
1870     } else {
1871         RAMBlock *block;
1872         rcu_read_lock();
1873         QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1874             if (addr - block->offset < block->max_length) {
1875                 if (addr - block->offset + *size > block->max_length)
1876                     *size = block->max_length - addr + block->offset;
1877                 ptr = ramblock_ptr(block, addr - block->offset);
1878                 rcu_read_unlock();
1879                 return ptr;
1880             }
1881         }
1882
1883         fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1884         abort();
1885     }
1886 }
1887
1888 /*
1889  * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1890  * in that RAMBlock.
1891  *
1892  * ptr: Host pointer to look up
1893  * round_offset: If true round the result offset down to a page boundary
1894  * *ram_addr: set to result ram_addr
1895  * *offset: set to result offset within the RAMBlock
1896  *
1897  * Returns: RAMBlock (or NULL if not found)
1898  *
1899  * By the time this function returns, the returned pointer is not protected
1900  * by RCU anymore.  If the caller is not within an RCU critical section and
1901  * does not hold the iothread lock, it must have other means of protecting the
1902  * pointer, such as a reference to the region that includes the incoming
1903  * ram_addr_t.
1904  */
1905 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1906                                    ram_addr_t *ram_addr,
1907                                    ram_addr_t *offset)
1908 {
1909     RAMBlock *block;
1910     uint8_t *host = ptr;
1911
1912     if (xen_enabled()) {
1913         rcu_read_lock();
1914         *ram_addr = xen_ram_addr_from_mapcache(ptr);
1915         block = qemu_get_ram_block(*ram_addr);
1916         if (block) {
1917             *offset = (host - block->host);
1918         }
1919         rcu_read_unlock();
1920         return block;
1921     }
1922
1923     rcu_read_lock();
1924     block = atomic_rcu_read(&ram_list.mru_block);
1925     if (block && block->host && host - block->host < block->max_length) {
1926         goto found;
1927     }
1928
1929     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1930         /* This case append when the block is not mapped. */
1931         if (block->host == NULL) {
1932             continue;
1933         }
1934         if (host - block->host < block->max_length) {
1935             goto found;
1936         }
1937     }
1938
1939     rcu_read_unlock();
1940     return NULL;
1941
1942 found:
1943     *offset = (host - block->host);
1944     if (round_offset) {
1945         *offset &= TARGET_PAGE_MASK;
1946     }
1947     *ram_addr = block->offset + *offset;
1948     rcu_read_unlock();
1949     return block;
1950 }
1951
1952 /*
1953  * Finds the named RAMBlock
1954  *
1955  * name: The name of RAMBlock to find
1956  *
1957  * Returns: RAMBlock (or NULL if not found)
1958  */
1959 RAMBlock *qemu_ram_block_by_name(const char *name)
1960 {
1961     RAMBlock *block;
1962
1963     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1964         if (!strcmp(name, block->idstr)) {
1965             return block;
1966         }
1967     }
1968
1969     return NULL;
1970 }
1971
1972 /* Some of the softmmu routines need to translate from a host pointer
1973    (typically a TLB entry) back to a ram offset.  */
1974 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1975 {
1976     RAMBlock *block;
1977     ram_addr_t offset; /* Not used */
1978
1979     block = qemu_ram_block_from_host(ptr, false, ram_addr, &offset);
1980
1981     if (!block) {
1982         return NULL;
1983     }
1984
1985     return block->mr;
1986 }
1987
1988 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1989                                uint64_t val, unsigned size)
1990 {
1991     if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1992         tb_invalidate_phys_page_fast(ram_addr, size);
1993     }
1994     switch (size) {
1995     case 1:
1996         stb_p(qemu_get_ram_ptr(ram_addr), val);
1997         break;
1998     case 2:
1999         stw_p(qemu_get_ram_ptr(ram_addr), val);
2000         break;
2001     case 4:
2002         stl_p(qemu_get_ram_ptr(ram_addr), val);
2003         break;
2004     default:
2005         abort();
2006     }
2007     /* Set both VGA and migration bits for simplicity and to remove
2008      * the notdirty callback faster.
2009      */
2010     cpu_physical_memory_set_dirty_range(ram_addr, size,
2011                                         DIRTY_CLIENTS_NOCODE);
2012     /* we remove the notdirty callback only if the code has been
2013        flushed */
2014     if (!cpu_physical_memory_is_clean(ram_addr)) {
2015         tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2016     }
2017 }
2018
2019 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2020                                  unsigned size, bool is_write)
2021 {
2022     return is_write;
2023 }
2024
2025 static const MemoryRegionOps notdirty_mem_ops = {
2026     .write = notdirty_mem_write,
2027     .valid.accepts = notdirty_mem_accepts,
2028     .endianness = DEVICE_NATIVE_ENDIAN,
2029 };
2030
2031 /* Generate a debug exception if a watchpoint has been hit.  */
2032 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2033 {
2034     CPUState *cpu = current_cpu;
2035     CPUArchState *env = cpu->env_ptr;
2036     target_ulong pc, cs_base;
2037     target_ulong vaddr;
2038     CPUWatchpoint *wp;
2039     int cpu_flags;
2040
2041     if (cpu->watchpoint_hit) {
2042         /* We re-entered the check after replacing the TB. Now raise
2043          * the debug interrupt so that is will trigger after the
2044          * current instruction. */
2045         cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2046         return;
2047     }
2048     vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2049     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2050         if (cpu_watchpoint_address_matches(wp, vaddr, len)
2051             && (wp->flags & flags)) {
2052             if (flags == BP_MEM_READ) {
2053                 wp->flags |= BP_WATCHPOINT_HIT_READ;
2054             } else {
2055                 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2056             }
2057             wp->hitaddr = vaddr;
2058             wp->hitattrs = attrs;
2059             if (!cpu->watchpoint_hit) {
2060                 cpu->watchpoint_hit = wp;
2061                 tb_check_watchpoint(cpu);
2062                 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2063                     cpu->exception_index = EXCP_DEBUG;
2064                     cpu_loop_exit(cpu);
2065                 } else {
2066                     cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2067                     tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2068                     cpu_resume_from_signal(cpu, NULL);
2069                 }
2070             }
2071         } else {
2072             wp->flags &= ~BP_WATCHPOINT_HIT;
2073         }
2074     }
2075 }
2076
2077 /* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
2078    so these check for a hit then pass through to the normal out-of-line
2079    phys routines.  */
2080 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2081                                   unsigned size, MemTxAttrs attrs)
2082 {
2083     MemTxResult res;
2084     uint64_t data;
2085
2086     check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2087     switch (size) {
2088     case 1:
2089         data = address_space_ldub(&address_space_memory, addr, attrs, &res);
2090         break;
2091     case 2:
2092         data = address_space_lduw(&address_space_memory, addr, attrs, &res);
2093         break;
2094     case 4:
2095         data = address_space_ldl(&address_space_memory, addr, attrs, &res);
2096         break;
2097     default: abort();
2098     }
2099     *pdata = data;
2100     return res;
2101 }
2102
2103 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2104                                    uint64_t val, unsigned size,
2105                                    MemTxAttrs attrs)
2106 {
2107     MemTxResult res;
2108
2109     check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2110     switch (size) {
2111     case 1:
2112         address_space_stb(&address_space_memory, addr, val, attrs, &res);
2113         break;
2114     case 2:
2115         address_space_stw(&address_space_memory, addr, val, attrs, &res);
2116         break;
2117     case 4:
2118         address_space_stl(&address_space_memory, addr, val, attrs, &res);
2119         break;
2120     default: abort();
2121     }
2122     return res;
2123 }
2124
2125 static const MemoryRegionOps watch_mem_ops = {
2126     .read_with_attrs = watch_mem_read,
2127     .write_with_attrs = watch_mem_write,
2128     .endianness = DEVICE_NATIVE_ENDIAN,
2129 };
2130
2131 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2132                                 unsigned len, MemTxAttrs attrs)
2133 {
2134     subpage_t *subpage = opaque;
2135     uint8_t buf[8];
2136     MemTxResult res;
2137
2138 #if defined(DEBUG_SUBPAGE)
2139     printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2140            subpage, len, addr);
2141 #endif
2142     res = address_space_read(subpage->as, addr + subpage->base,
2143                              attrs, buf, len);
2144     if (res) {
2145         return res;
2146     }
2147     switch (len) {
2148     case 1:
2149         *data = ldub_p(buf);
2150         return MEMTX_OK;
2151     case 2:
2152         *data = lduw_p(buf);
2153         return MEMTX_OK;
2154     case 4:
2155         *data = ldl_p(buf);
2156         return MEMTX_OK;
2157     case 8:
2158         *data = ldq_p(buf);
2159         return MEMTX_OK;
2160     default:
2161         abort();
2162     }
2163 }
2164
2165 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2166                                  uint64_t value, unsigned len, MemTxAttrs attrs)
2167 {
2168     subpage_t *subpage = opaque;
2169     uint8_t buf[8];
2170
2171 #if defined(DEBUG_SUBPAGE)
2172     printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2173            " value %"PRIx64"\n",
2174            __func__, subpage, len, addr, value);
2175 #endif
2176     switch (len) {
2177     case 1:
2178         stb_p(buf, value);
2179         break;
2180     case 2:
2181         stw_p(buf, value);
2182         break;
2183     case 4:
2184         stl_p(buf, value);
2185         break;
2186     case 8:
2187         stq_p(buf, value);
2188         break;
2189     default:
2190         abort();
2191     }
2192     return address_space_write(subpage->as, addr + subpage->base,
2193                                attrs, buf, len);
2194 }
2195
2196 static bool subpage_accepts(void *opaque, hwaddr addr,
2197                             unsigned len, bool is_write)
2198 {
2199     subpage_t *subpage = opaque;
2200 #if defined(DEBUG_SUBPAGE)
2201     printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2202            __func__, subpage, is_write ? 'w' : 'r', len, addr);
2203 #endif
2204
2205     return address_space_access_valid(subpage->as, addr + subpage->base,
2206                                       len, is_write);
2207 }
2208
2209 static const MemoryRegionOps subpage_ops = {
2210     .read_with_attrs = subpage_read,
2211     .write_with_attrs = subpage_write,
2212     .impl.min_access_size = 1,
2213     .impl.max_access_size = 8,
2214     .valid.min_access_size = 1,
2215     .valid.max_access_size = 8,
2216     .valid.accepts = subpage_accepts,
2217     .endianness = DEVICE_NATIVE_ENDIAN,
2218 };
2219
2220 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2221                              uint16_t section)
2222 {
2223     int idx, eidx;
2224
2225     if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2226         return -1;
2227     idx = SUBPAGE_IDX(start);
2228     eidx = SUBPAGE_IDX(end);
2229 #if defined(DEBUG_SUBPAGE)
2230     printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2231            __func__, mmio, start, end, idx, eidx, section);
2232 #endif
2233     for (; idx <= eidx; idx++) {
2234         mmio->sub_section[idx] = section;
2235     }
2236
2237     return 0;
2238 }
2239
2240 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2241 {
2242     subpage_t *mmio;
2243
2244     mmio = g_malloc0(sizeof(subpage_t));
2245
2246     mmio->as = as;
2247     mmio->base = base;
2248     memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2249                           NULL, TARGET_PAGE_SIZE);
2250     mmio->iomem.subpage = true;
2251 #if defined(DEBUG_SUBPAGE)
2252     printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2253            mmio, base, TARGET_PAGE_SIZE);
2254 #endif
2255     subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2256
2257     return mmio;
2258 }
2259
2260 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2261                               MemoryRegion *mr)
2262 {
2263     assert(as);
2264     MemoryRegionSection section = {
2265         .address_space = as,
2266         .mr = mr,
2267         .offset_within_address_space = 0,
2268         .offset_within_region = 0,
2269         .size = int128_2_64(),
2270     };
2271
2272     return phys_section_add(map, &section);
2273 }
2274
2275 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
2276 {
2277     CPUAddressSpace *cpuas = &cpu->cpu_ases[0];
2278     AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2279     MemoryRegionSection *sections = d->map.sections;
2280
2281     return sections[index & ~TARGET_PAGE_MASK].mr;
2282 }
2283
2284 static void io_mem_init(void)
2285 {
2286     memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2287     memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2288                           NULL, UINT64_MAX);
2289     memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2290                           NULL, UINT64_MAX);
2291     memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2292                           NULL, UINT64_MAX);
2293 }
2294
2295 static void mem_begin(MemoryListener *listener)
2296 {
2297     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2298     AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2299     uint16_t n;
2300
2301     n = dummy_section(&d->map, as, &io_mem_unassigned);
2302     assert(n == PHYS_SECTION_UNASSIGNED);
2303     n = dummy_section(&d->map, as, &io_mem_notdirty);
2304     assert(n == PHYS_SECTION_NOTDIRTY);
2305     n = dummy_section(&d->map, as, &io_mem_rom);
2306     assert(n == PHYS_SECTION_ROM);
2307     n = dummy_section(&d->map, as, &io_mem_watch);
2308     assert(n == PHYS_SECTION_WATCH);
2309
2310     d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2311     d->as = as;
2312     as->next_dispatch = d;
2313 }
2314
2315 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2316 {
2317     phys_sections_free(&d->map);
2318     g_free(d);
2319 }
2320
2321 static void mem_commit(MemoryListener *listener)
2322 {
2323     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2324     AddressSpaceDispatch *cur = as->dispatch;
2325     AddressSpaceDispatch *next = as->next_dispatch;
2326
2327     phys_page_compact_all(next, next->map.nodes_nb);
2328
2329     atomic_rcu_set(&as->dispatch, next);
2330     if (cur) {
2331         call_rcu(cur, address_space_dispatch_free, rcu);
2332     }
2333 }
2334
2335 static void tcg_commit(MemoryListener *listener)
2336 {
2337     CPUAddressSpace *cpuas;
2338     AddressSpaceDispatch *d;
2339
2340     /* since each CPU stores ram addresses in its TLB cache, we must
2341        reset the modified entries */
2342     cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2343     cpu_reloading_memory_map();
2344     /* The CPU and TLB are protected by the iothread lock.
2345      * We reload the dispatch pointer now because cpu_reloading_memory_map()
2346      * may have split the RCU critical section.
2347      */
2348     d = atomic_rcu_read(&cpuas->as->dispatch);
2349     cpuas->memory_dispatch = d;
2350     tlb_flush(cpuas->cpu, 1);
2351 }
2352
2353 void address_space_init_dispatch(AddressSpace *as)
2354 {
2355     as->dispatch = NULL;
2356     as->dispatch_listener = (MemoryListener) {
2357         .begin = mem_begin,
2358         .commit = mem_commit,
2359         .region_add = mem_add,
2360         .region_nop = mem_add,
2361         .priority = 0,
2362     };
2363     memory_listener_register(&as->dispatch_listener, as);
2364 }
2365
2366 void address_space_unregister(AddressSpace *as)
2367 {
2368     memory_listener_unregister(&as->dispatch_listener);
2369 }
2370
2371 void address_space_destroy_dispatch(AddressSpace *as)
2372 {
2373     AddressSpaceDispatch *d = as->dispatch;
2374
2375     atomic_rcu_set(&as->dispatch, NULL);
2376     if (d) {
2377         call_rcu(d, address_space_dispatch_free, rcu);
2378     }
2379 }
2380
2381 static void memory_map_init(void)
2382 {
2383     system_memory = g_malloc(sizeof(*system_memory));
2384
2385     memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2386     address_space_init(&address_space_memory, system_memory, "memory");
2387
2388     system_io = g_malloc(sizeof(*system_io));
2389     memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2390                           65536);
2391     address_space_init(&address_space_io, system_io, "I/O");
2392 }
2393
2394 MemoryRegion *get_system_memory(void)
2395 {
2396     return system_memory;
2397 }
2398
2399 MemoryRegion *get_system_io(void)
2400 {
2401     return system_io;
2402 }
2403
2404 #endif /* !defined(CONFIG_USER_ONLY) */
2405
2406 /* physical memory access (slow version, mainly for debug) */
2407 #if defined(CONFIG_USER_ONLY)
2408 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2409                         uint8_t *buf, int len, int is_write)
2410 {
2411     int l, flags;
2412     target_ulong page;
2413     void * p;
2414
2415     while (len > 0) {
2416         page = addr & TARGET_PAGE_MASK;
2417         l = (page + TARGET_PAGE_SIZE) - addr;
2418         if (l > len)
2419             l = len;
2420         flags = page_get_flags(page);
2421         if (!(flags & PAGE_VALID))
2422             return -1;
2423         if (is_write) {
2424             if (!(flags & PAGE_WRITE))
2425                 return -1;
2426             /* XXX: this code should not depend on lock_user */
2427             if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2428                 return -1;
2429             memcpy(p, buf, l);
2430             unlock_user(p, addr, l);
2431         } else {
2432             if (!(flags & PAGE_READ))
2433                 return -1;
2434             /* XXX: this code should not depend on lock_user */
2435             if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2436                 return -1;
2437             memcpy(buf, p, l);
2438             unlock_user(p, addr, 0);
2439         }
2440         len -= l;
2441         buf += l;
2442         addr += l;
2443     }
2444     return 0;
2445 }
2446
2447 #else
2448
2449 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2450                                      hwaddr length)
2451 {
2452     uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2453     /* No early return if dirty_log_mask is or becomes 0, because
2454      * cpu_physical_memory_set_dirty_range will still call
2455      * xen_modified_memory.
2456      */
2457     if (dirty_log_mask) {
2458         dirty_log_mask =
2459             cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2460     }
2461     if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2462         tb_invalidate_phys_range(addr, addr + length);
2463         dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2464     }
2465     cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2466 }
2467
2468 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2469 {
2470     unsigned access_size_max = mr->ops->valid.max_access_size;
2471
2472     /* Regions are assumed to support 1-4 byte accesses unless
2473        otherwise specified.  */
2474     if (access_size_max == 0) {
2475         access_size_max = 4;
2476     }
2477
2478     /* Bound the maximum access by the alignment of the address.  */
2479     if (!mr->ops->impl.unaligned) {
2480         unsigned align_size_max = addr & -addr;
2481         if (align_size_max != 0 && align_size_max < access_size_max) {
2482             access_size_max = align_size_max;
2483         }
2484     }
2485
2486     /* Don't attempt accesses larger than the maximum.  */
2487     if (l > access_size_max) {
2488         l = access_size_max;
2489     }
2490     l = pow2floor(l);
2491
2492     return l;
2493 }
2494
2495 static bool prepare_mmio_access(MemoryRegion *mr)
2496 {
2497     bool unlocked = !qemu_mutex_iothread_locked();
2498     bool release_lock = false;
2499
2500     if (unlocked && mr->global_locking) {
2501         qemu_mutex_lock_iothread();
2502         unlocked = false;
2503         release_lock = true;
2504     }
2505     if (mr->flush_coalesced_mmio) {
2506         if (unlocked) {
2507             qemu_mutex_lock_iothread();
2508         }
2509         qemu_flush_coalesced_mmio_buffer();
2510         if (unlocked) {
2511             qemu_mutex_unlock_iothread();
2512         }
2513     }
2514
2515     return release_lock;
2516 }
2517
2518 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2519                              uint8_t *buf, int len, bool is_write)
2520 {
2521     hwaddr l;
2522     uint8_t *ptr;
2523     uint64_t val;
2524     hwaddr addr1;
2525     MemoryRegion *mr;
2526     MemTxResult result = MEMTX_OK;
2527     bool release_lock = false;
2528
2529     rcu_read_lock();
2530     while (len > 0) {
2531         l = len;
2532         mr = address_space_translate(as, addr, &addr1, &l, is_write);
2533
2534         if (is_write) {
2535             if (!memory_access_is_direct(mr, is_write)) {
2536                 release_lock |= prepare_mmio_access(mr);
2537                 l = memory_access_size(mr, l, addr1);
2538                 /* XXX: could force current_cpu to NULL to avoid
2539                    potential bugs */
2540                 switch (l) {
2541                 case 8:
2542                     /* 64 bit write access */
2543                     val = ldq_p(buf);
2544                     result |= memory_region_dispatch_write(mr, addr1, val, 8,
2545                                                            attrs);
2546                     break;
2547                 case 4:
2548                     /* 32 bit write access */
2549                     val = ldl_p(buf);
2550                     result |= memory_region_dispatch_write(mr, addr1, val, 4,
2551                                                            attrs);
2552                     break;
2553                 case 2:
2554                     /* 16 bit write access */
2555                     val = lduw_p(buf);
2556                     result |= memory_region_dispatch_write(mr, addr1, val, 2,
2557                                                            attrs);
2558                     break;
2559                 case 1:
2560                     /* 8 bit write access */
2561                     val = ldub_p(buf);
2562                     result |= memory_region_dispatch_write(mr, addr1, val, 1,
2563                                                            attrs);
2564                     break;
2565                 default:
2566                     abort();
2567                 }
2568             } else {
2569                 addr1 += memory_region_get_ram_addr(mr);
2570                 /* RAM case */
2571                 ptr = qemu_get_ram_ptr(addr1);
2572                 memcpy(ptr, buf, l);
2573                 invalidate_and_set_dirty(mr, addr1, l);
2574             }
2575         } else {
2576             if (!memory_access_is_direct(mr, is_write)) {
2577                 /* I/O case */
2578                 release_lock |= prepare_mmio_access(mr);
2579                 l = memory_access_size(mr, l, addr1);
2580                 switch (l) {
2581                 case 8:
2582                     /* 64 bit read access */
2583                     result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2584                                                           attrs);
2585                     stq_p(buf, val);
2586                     break;
2587                 case 4:
2588                     /* 32 bit read access */
2589                     result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2590                                                           attrs);
2591                     stl_p(buf, val);
2592                     break;
2593                 case 2:
2594                     /* 16 bit read access */
2595                     result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2596                                                           attrs);
2597                     stw_p(buf, val);
2598                     break;
2599                 case 1:
2600                     /* 8 bit read access */
2601                     result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2602                                                           attrs);
2603                     stb_p(buf, val);
2604                     break;
2605                 default:
2606                     abort();
2607                 }
2608             } else {
2609                 /* RAM case */
2610                 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2611                 memcpy(buf, ptr, l);
2612             }
2613         }
2614
2615         if (release_lock) {
2616             qemu_mutex_unlock_iothread();
2617             release_lock = false;
2618         }
2619
2620         len -= l;
2621         buf += l;
2622         addr += l;
2623     }
2624     rcu_read_unlock();
2625
2626     return result;
2627 }
2628
2629 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2630                                 const uint8_t *buf, int len)
2631 {
2632     return address_space_rw(as, addr, attrs, (uint8_t *)buf, len, true);
2633 }
2634
2635 MemTxResult address_space_read(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2636                                uint8_t *buf, int len)
2637 {
2638     return address_space_rw(as, addr, attrs, buf, len, false);
2639 }
2640
2641
2642 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2643                             int len, int is_write)
2644 {
2645     address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2646                      buf, len, is_write);
2647 }
2648
2649 enum write_rom_type {
2650     WRITE_DATA,
2651     FLUSH_CACHE,
2652 };
2653
2654 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2655     hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2656 {
2657     hwaddr l;
2658     uint8_t *ptr;
2659     hwaddr addr1;
2660     MemoryRegion *mr;
2661
2662     rcu_read_lock();
2663     while (len > 0) {
2664         l = len;
2665         mr = address_space_translate(as, addr, &addr1, &l, true);
2666
2667         if (!(memory_region_is_ram(mr) ||
2668               memory_region_is_romd(mr))) {
2669             l = memory_access_size(mr, l, addr1);
2670         } else {
2671             addr1 += memory_region_get_ram_addr(mr);
2672             /* ROM/RAM case */
2673             ptr = qemu_get_ram_ptr(addr1);
2674             switch (type) {
2675             case WRITE_DATA:
2676                 memcpy(ptr, buf, l);
2677                 invalidate_and_set_dirty(mr, addr1, l);
2678                 break;
2679             case FLUSH_CACHE:
2680                 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2681                 break;
2682             }
2683         }
2684         len -= l;
2685         buf += l;
2686         addr += l;
2687     }
2688     rcu_read_unlock();
2689 }
2690
2691 /* used for ROM loading : can write in RAM and ROM */
2692 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2693                                    const uint8_t *buf, int len)
2694 {
2695     cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2696 }
2697
2698 void cpu_flush_icache_range(hwaddr start, int len)
2699 {
2700     /*
2701      * This function should do the same thing as an icache flush that was
2702      * triggered from within the guest. For TCG we are always cache coherent,
2703      * so there is no need to flush anything. For KVM / Xen we need to flush
2704      * the host's instruction cache at least.
2705      */
2706     if (tcg_enabled()) {
2707         return;
2708     }
2709
2710     cpu_physical_memory_write_rom_internal(&address_space_memory,
2711                                            start, NULL, len, FLUSH_CACHE);
2712 }
2713
2714 typedef struct {
2715     MemoryRegion *mr;
2716     void *buffer;
2717     hwaddr addr;
2718     hwaddr len;
2719     bool in_use;
2720 } BounceBuffer;
2721
2722 static BounceBuffer bounce;
2723
2724 typedef struct MapClient {
2725     QEMUBH *bh;
2726     QLIST_ENTRY(MapClient) link;
2727 } MapClient;
2728
2729 QemuMutex map_client_list_lock;
2730 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2731     = QLIST_HEAD_INITIALIZER(map_client_list);
2732
2733 static void cpu_unregister_map_client_do(MapClient *client)
2734 {
2735     QLIST_REMOVE(client, link);
2736     g_free(client);
2737 }
2738
2739 static void cpu_notify_map_clients_locked(void)
2740 {
2741     MapClient *client;
2742
2743     while (!QLIST_EMPTY(&map_client_list)) {
2744         client = QLIST_FIRST(&map_client_list);
2745         qemu_bh_schedule(client->bh);
2746         cpu_unregister_map_client_do(client);
2747     }
2748 }
2749
2750 void cpu_register_map_client(QEMUBH *bh)
2751 {
2752     MapClient *client = g_malloc(sizeof(*client));
2753
2754     qemu_mutex_lock(&map_client_list_lock);
2755     client->bh = bh;
2756     QLIST_INSERT_HEAD(&map_client_list, client, link);
2757     if (!atomic_read(&bounce.in_use)) {
2758         cpu_notify_map_clients_locked();
2759     }
2760     qemu_mutex_unlock(&map_client_list_lock);
2761 }
2762
2763 void cpu_exec_init_all(void)
2764 {
2765     qemu_mutex_init(&ram_list.mutex);
2766     io_mem_init();
2767     memory_map_init();
2768     qemu_mutex_init(&map_client_list_lock);
2769 }
2770
2771 void cpu_unregister_map_client(QEMUBH *bh)
2772 {
2773     MapClient *client;
2774
2775     qemu_mutex_lock(&map_client_list_lock);
2776     QLIST_FOREACH(client, &map_client_list, link) {
2777         if (client->bh == bh) {
2778             cpu_unregister_map_client_do(client);
2779             break;
2780         }
2781     }
2782     qemu_mutex_unlock(&map_client_list_lock);
2783 }
2784
2785 static void cpu_notify_map_clients(void)
2786 {
2787     qemu_mutex_lock(&map_client_list_lock);
2788     cpu_notify_map_clients_locked();
2789     qemu_mutex_unlock(&map_client_list_lock);
2790 }
2791
2792 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2793 {
2794     MemoryRegion *mr;
2795     hwaddr l, xlat;
2796
2797     rcu_read_lock();
2798     while (len > 0) {
2799         l = len;
2800         mr = address_space_translate(as, addr, &xlat, &l, is_write);
2801         if (!memory_access_is_direct(mr, is_write)) {
2802             l = memory_access_size(mr, l, addr);
2803             if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2804                 return false;
2805             }
2806         }
2807
2808         len -= l;
2809         addr += l;
2810     }
2811     rcu_read_unlock();
2812     return true;
2813 }
2814
2815 /* Map a physical memory region into a host virtual address.
2816  * May map a subset of the requested range, given by and returned in *plen.
2817  * May return NULL if resources needed to perform the mapping are exhausted.
2818  * Use only for reads OR writes - not for read-modify-write operations.
2819  * Use cpu_register_map_client() to know when retrying the map operation is
2820  * likely to succeed.
2821  */
2822 void *address_space_map(AddressSpace *as,
2823                         hwaddr addr,
2824                         hwaddr *plen,
2825                         bool is_write)
2826 {
2827     hwaddr len = *plen;
2828     hwaddr done = 0;
2829     hwaddr l, xlat, base;
2830     MemoryRegion *mr, *this_mr;
2831     ram_addr_t raddr;
2832
2833     if (len == 0) {
2834         return NULL;
2835     }
2836
2837     l = len;
2838     rcu_read_lock();
2839     mr = address_space_translate(as, addr, &xlat, &l, is_write);
2840
2841     if (!memory_access_is_direct(mr, is_write)) {
2842         if (atomic_xchg(&bounce.in_use, true)) {
2843             rcu_read_unlock();
2844             return NULL;
2845         }
2846         /* Avoid unbounded allocations */
2847         l = MIN(l, TARGET_PAGE_SIZE);
2848         bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2849         bounce.addr = addr;
2850         bounce.len = l;
2851
2852         memory_region_ref(mr);
2853         bounce.mr = mr;
2854         if (!is_write) {
2855             address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2856                                bounce.buffer, l);
2857         }
2858
2859         rcu_read_unlock();
2860         *plen = l;
2861         return bounce.buffer;
2862     }
2863
2864     base = xlat;
2865     raddr = memory_region_get_ram_addr(mr);
2866
2867     for (;;) {
2868         len -= l;
2869         addr += l;
2870         done += l;
2871         if (len == 0) {
2872             break;
2873         }
2874
2875         l = len;
2876         this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2877         if (this_mr != mr || xlat != base + done) {
2878             break;
2879         }
2880     }
2881
2882     memory_region_ref(mr);
2883     rcu_read_unlock();
2884     *plen = done;
2885     return qemu_ram_ptr_length(raddr + base, plen);
2886 }
2887
2888 /* Unmaps a memory region previously mapped by address_space_map().
2889  * Will also mark the memory as dirty if is_write == 1.  access_len gives
2890  * the amount of memory that was actually read or written by the caller.
2891  */
2892 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2893                          int is_write, hwaddr access_len)
2894 {
2895     if (buffer != bounce.buffer) {
2896         MemoryRegion *mr;
2897         ram_addr_t addr1;
2898
2899         mr = qemu_ram_addr_from_host(buffer, &addr1);
2900         assert(mr != NULL);
2901         if (is_write) {
2902             invalidate_and_set_dirty(mr, addr1, access_len);
2903         }
2904         if (xen_enabled()) {
2905             xen_invalidate_map_cache_entry(buffer);
2906         }
2907         memory_region_unref(mr);
2908         return;
2909     }
2910     if (is_write) {
2911         address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
2912                             bounce.buffer, access_len);
2913     }
2914     qemu_vfree(bounce.buffer);
2915     bounce.buffer = NULL;
2916     memory_region_unref(bounce.mr);
2917     atomic_mb_set(&bounce.in_use, false);
2918     cpu_notify_map_clients();
2919 }
2920
2921 void *cpu_physical_memory_map(hwaddr addr,
2922                               hwaddr *plen,
2923                               int is_write)
2924 {
2925     return address_space_map(&address_space_memory, addr, plen, is_write);
2926 }
2927
2928 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2929                                int is_write, hwaddr access_len)
2930 {
2931     return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2932 }
2933
2934 /* warning: addr must be aligned */
2935 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
2936                                                   MemTxAttrs attrs,
2937                                                   MemTxResult *result,
2938                                                   enum device_endian endian)
2939 {
2940     uint8_t *ptr;
2941     uint64_t val;
2942     MemoryRegion *mr;
2943     hwaddr l = 4;
2944     hwaddr addr1;
2945     MemTxResult r;
2946     bool release_lock = false;
2947
2948     rcu_read_lock();
2949     mr = address_space_translate(as, addr, &addr1, &l, false);
2950     if (l < 4 || !memory_access_is_direct(mr, false)) {
2951         release_lock |= prepare_mmio_access(mr);
2952
2953         /* I/O case */
2954         r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
2955 #if defined(TARGET_WORDS_BIGENDIAN)
2956         if (endian == DEVICE_LITTLE_ENDIAN) {
2957             val = bswap32(val);
2958         }
2959 #else
2960         if (endian == DEVICE_BIG_ENDIAN) {
2961             val = bswap32(val);
2962         }
2963 #endif
2964     } else {
2965         /* RAM case */
2966         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2967                                 & TARGET_PAGE_MASK)
2968                                + addr1);
2969         switch (endian) {
2970         case DEVICE_LITTLE_ENDIAN:
2971             val = ldl_le_p(ptr);
2972             break;
2973         case DEVICE_BIG_ENDIAN:
2974             val = ldl_be_p(ptr);
2975             break;
2976         default:
2977             val = ldl_p(ptr);
2978             break;
2979         }
2980         r = MEMTX_OK;
2981     }
2982     if (result) {
2983         *result = r;
2984     }
2985     if (release_lock) {
2986         qemu_mutex_unlock_iothread();
2987     }
2988     rcu_read_unlock();
2989     return val;
2990 }
2991
2992 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
2993                            MemTxAttrs attrs, MemTxResult *result)
2994 {
2995     return address_space_ldl_internal(as, addr, attrs, result,
2996                                       DEVICE_NATIVE_ENDIAN);
2997 }
2998
2999 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3000                               MemTxAttrs attrs, MemTxResult *result)
3001 {
3002     return address_space_ldl_internal(as, addr, attrs, result,
3003                                       DEVICE_LITTLE_ENDIAN);
3004 }
3005
3006 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3007                               MemTxAttrs attrs, MemTxResult *result)
3008 {
3009     return address_space_ldl_internal(as, addr, attrs, result,
3010                                       DEVICE_BIG_ENDIAN);
3011 }
3012
3013 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3014 {
3015     return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3016 }
3017
3018 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3019 {
3020     return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3021 }
3022
3023 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3024 {
3025     return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3026 }
3027
3028 /* warning: addr must be aligned */
3029 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3030                                                   MemTxAttrs attrs,
3031                                                   MemTxResult *result,
3032                                                   enum device_endian endian)
3033 {
3034     uint8_t *ptr;
3035     uint64_t val;
3036     MemoryRegion *mr;
3037     hwaddr l = 8;
3038     hwaddr addr1;
3039     MemTxResult r;
3040     bool release_lock = false;
3041
3042     rcu_read_lock();
3043     mr = address_space_translate(as, addr, &addr1, &l,
3044                                  false);
3045     if (l < 8 || !memory_access_is_direct(mr, false)) {
3046         release_lock |= prepare_mmio_access(mr);
3047
3048         /* I/O case */
3049         r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3050 #if defined(TARGET_WORDS_BIGENDIAN)
3051         if (endian == DEVICE_LITTLE_ENDIAN) {
3052             val = bswap64(val);
3053         }
3054 #else
3055         if (endian == DEVICE_BIG_ENDIAN) {
3056             val = bswap64(val);
3057         }
3058 #endif
3059     } else {
3060         /* RAM case */
3061         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3062                                 & TARGET_PAGE_MASK)
3063                                + addr1);
3064         switch (endian) {
3065         case DEVICE_LITTLE_ENDIAN:
3066             val = ldq_le_p(ptr);
3067             break;
3068         case DEVICE_BIG_ENDIAN:
3069             val = ldq_be_p(ptr);
3070             break;
3071         default:
3072             val = ldq_p(ptr);
3073             break;
3074         }
3075         r = MEMTX_OK;
3076     }
3077     if (result) {
3078         *result = r;
3079     }
3080     if (release_lock) {
3081         qemu_mutex_unlock_iothread();
3082     }
3083     rcu_read_unlock();
3084     return val;
3085 }
3086
3087 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3088                            MemTxAttrs attrs, MemTxResult *result)
3089 {
3090     return address_space_ldq_internal(as, addr, attrs, result,
3091                                       DEVICE_NATIVE_ENDIAN);
3092 }
3093
3094 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3095                            MemTxAttrs attrs, MemTxResult *result)
3096 {
3097     return address_space_ldq_internal(as, addr, attrs, result,
3098                                       DEVICE_LITTLE_ENDIAN);
3099 }
3100
3101 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3102                            MemTxAttrs attrs, MemTxResult *result)
3103 {
3104     return address_space_ldq_internal(as, addr, attrs, result,
3105                                       DEVICE_BIG_ENDIAN);
3106 }
3107
3108 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3109 {
3110     return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3111 }
3112
3113 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3114 {
3115     return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3116 }
3117
3118 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3119 {
3120     return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3121 }
3122
3123 /* XXX: optimize */
3124 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3125                             MemTxAttrs attrs, MemTxResult *result)
3126 {
3127     uint8_t val;
3128     MemTxResult r;
3129
3130     r = address_space_rw(as, addr, attrs, &val, 1, 0);
3131     if (result) {
3132         *result = r;
3133     }
3134     return val;
3135 }
3136
3137 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3138 {
3139     return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3140 }
3141
3142 /* warning: addr must be aligned */
3143 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3144                                                    hwaddr addr,
3145                                                    MemTxAttrs attrs,
3146                                                    MemTxResult *result,
3147                                                    enum device_endian endian)
3148 {
3149     uint8_t *ptr;
3150     uint64_t val;
3151     MemoryRegion *mr;
3152     hwaddr l = 2;
3153     hwaddr addr1;
3154     MemTxResult r;
3155     bool release_lock = false;
3156
3157     rcu_read_lock();
3158     mr = address_space_translate(as, addr, &addr1, &l,
3159                                  false);
3160     if (l < 2 || !memory_access_is_direct(mr, false)) {
3161         release_lock |= prepare_mmio_access(mr);
3162
3163         /* I/O case */
3164         r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3165 #if defined(TARGET_WORDS_BIGENDIAN)
3166         if (endian == DEVICE_LITTLE_ENDIAN) {
3167             val = bswap16(val);
3168         }
3169 #else
3170         if (endian == DEVICE_BIG_ENDIAN) {
3171             val = bswap16(val);
3172         }
3173 #endif
3174     } else {
3175         /* RAM case */
3176         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3177                                 & TARGET_PAGE_MASK)
3178                                + addr1);
3179         switch (endian) {
3180         case DEVICE_LITTLE_ENDIAN:
3181             val = lduw_le_p(ptr);
3182             break;
3183         case DEVICE_BIG_ENDIAN:
3184             val = lduw_be_p(ptr);
3185             break;
3186         default:
3187             val = lduw_p(ptr);
3188             break;
3189         }
3190         r = MEMTX_OK;
3191     }
3192     if (result) {
3193         *result = r;
3194     }
3195     if (release_lock) {
3196         qemu_mutex_unlock_iothread();
3197     }
3198     rcu_read_unlock();
3199     return val;
3200 }
3201
3202 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3203                            MemTxAttrs attrs, MemTxResult *result)
3204 {
3205     return address_space_lduw_internal(as, addr, attrs, result,
3206                                        DEVICE_NATIVE_ENDIAN);
3207 }
3208
3209 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3210                            MemTxAttrs attrs, MemTxResult *result)
3211 {
3212     return address_space_lduw_internal(as, addr, attrs, result,
3213                                        DEVICE_LITTLE_ENDIAN);
3214 }
3215
3216 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3217                            MemTxAttrs attrs, MemTxResult *result)
3218 {
3219     return address_space_lduw_internal(as, addr, attrs, result,
3220                                        DEVICE_BIG_ENDIAN);
3221 }
3222
3223 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3224 {
3225     return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3226 }
3227
3228 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3229 {
3230     return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3231 }
3232
3233 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3234 {
3235     return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3236 }
3237
3238 /* warning: addr must be aligned. The ram page is not masked as dirty
3239    and the code inside is not invalidated. It is useful if the dirty
3240    bits are used to track modified PTEs */
3241 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3242                                 MemTxAttrs attrs, MemTxResult *result)
3243 {
3244     uint8_t *ptr;
3245     MemoryRegion *mr;
3246     hwaddr l = 4;
3247     hwaddr addr1;
3248     MemTxResult r;
3249     uint8_t dirty_log_mask;
3250     bool release_lock = false;
3251
3252     rcu_read_lock();
3253     mr = address_space_translate(as, addr, &addr1, &l,
3254                                  true);
3255     if (l < 4 || !memory_access_is_direct(mr, true)) {
3256         release_lock |= prepare_mmio_access(mr);
3257
3258         r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3259     } else {
3260         addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3261         ptr = qemu_get_ram_ptr(addr1);
3262         stl_p(ptr, val);
3263
3264         dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3265         dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3266         cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3267         r = MEMTX_OK;
3268     }
3269     if (result) {
3270         *result = r;
3271     }
3272     if (release_lock) {
3273         qemu_mutex_unlock_iothread();
3274     }
3275     rcu_read_unlock();
3276 }
3277
3278 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3279 {
3280     address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3281 }
3282
3283 /* warning: addr must be aligned */
3284 static inline void address_space_stl_internal(AddressSpace *as,
3285                                               hwaddr addr, uint32_t val,
3286                                               MemTxAttrs attrs,
3287                                               MemTxResult *result,
3288                                               enum device_endian endian)
3289 {
3290     uint8_t *ptr;
3291     MemoryRegion *mr;
3292     hwaddr l = 4;
3293     hwaddr addr1;
3294     MemTxResult r;
3295     bool release_lock = false;
3296
3297     rcu_read_lock();
3298     mr = address_space_translate(as, addr, &addr1, &l,
3299                                  true);
3300     if (l < 4 || !memory_access_is_direct(mr, true)) {
3301         release_lock |= prepare_mmio_access(mr);
3302
3303 #if defined(TARGET_WORDS_BIGENDIAN)
3304         if (endian == DEVICE_LITTLE_ENDIAN) {
3305             val = bswap32(val);
3306         }
3307 #else
3308         if (endian == DEVICE_BIG_ENDIAN) {
3309             val = bswap32(val);
3310         }
3311 #endif
3312         r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3313     } else {
3314         /* RAM case */
3315         addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3316         ptr = qemu_get_ram_ptr(addr1);
3317         switch (endian) {
3318         case DEVICE_LITTLE_ENDIAN:
3319             stl_le_p(ptr, val);
3320             break;
3321         case DEVICE_BIG_ENDIAN:
3322             stl_be_p(ptr, val);
3323             break;
3324         default:
3325             stl_p(ptr, val);
3326             break;
3327         }
3328         invalidate_and_set_dirty(mr, addr1, 4);
3329         r = MEMTX_OK;
3330     }
3331     if (result) {
3332         *result = r;
3333     }
3334     if (release_lock) {
3335         qemu_mutex_unlock_iothread();
3336     }
3337     rcu_read_unlock();
3338 }
3339
3340 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3341                        MemTxAttrs attrs, MemTxResult *result)
3342 {
3343     address_space_stl_internal(as, addr, val, attrs, result,
3344                                DEVICE_NATIVE_ENDIAN);
3345 }
3346
3347 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3348                        MemTxAttrs attrs, MemTxResult *result)
3349 {
3350     address_space_stl_internal(as, addr, val, attrs, result,
3351                                DEVICE_LITTLE_ENDIAN);
3352 }
3353
3354 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3355                        MemTxAttrs attrs, MemTxResult *result)
3356 {
3357     address_space_stl_internal(as, addr, val, attrs, result,
3358                                DEVICE_BIG_ENDIAN);
3359 }
3360
3361 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3362 {
3363     address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3364 }
3365
3366 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3367 {
3368     address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3369 }
3370
3371 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3372 {
3373     address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3374 }
3375
3376 /* XXX: optimize */
3377 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3378                        MemTxAttrs attrs, MemTxResult *result)
3379 {
3380     uint8_t v = val;
3381     MemTxResult r;
3382
3383     r = address_space_rw(as, addr, attrs, &v, 1, 1);
3384     if (result) {
3385         *result = r;
3386     }
3387 }
3388
3389 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3390 {
3391     address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3392 }
3393
3394 /* warning: addr must be aligned */
3395 static inline void address_space_stw_internal(AddressSpace *as,
3396                                               hwaddr addr, uint32_t val,
3397                                               MemTxAttrs attrs,
3398                                               MemTxResult *result,
3399                                               enum device_endian endian)
3400 {
3401     uint8_t *ptr;
3402     MemoryRegion *mr;
3403     hwaddr l = 2;
3404     hwaddr addr1;
3405     MemTxResult r;
3406     bool release_lock = false;
3407
3408     rcu_read_lock();
3409     mr = address_space_translate(as, addr, &addr1, &l, true);
3410     if (l < 2 || !memory_access_is_direct(mr, true)) {
3411         release_lock |= prepare_mmio_access(mr);
3412
3413 #if defined(TARGET_WORDS_BIGENDIAN)
3414         if (endian == DEVICE_LITTLE_ENDIAN) {
3415             val = bswap16(val);
3416         }
3417 #else
3418         if (endian == DEVICE_BIG_ENDIAN) {
3419             val = bswap16(val);
3420         }
3421 #endif
3422         r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3423     } else {
3424         /* RAM case */
3425         addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3426         ptr = qemu_get_ram_ptr(addr1);
3427         switch (endian) {
3428         case DEVICE_LITTLE_ENDIAN:
3429             stw_le_p(ptr, val);
3430             break;
3431         case DEVICE_BIG_ENDIAN:
3432             stw_be_p(ptr, val);
3433             break;
3434         default:
3435             stw_p(ptr, val);
3436             break;
3437         }
3438         invalidate_and_set_dirty(mr, addr1, 2);
3439         r = MEMTX_OK;
3440     }
3441     if (result) {
3442         *result = r;
3443     }
3444     if (release_lock) {
3445         qemu_mutex_unlock_iothread();
3446     }
3447     rcu_read_unlock();
3448 }
3449
3450 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3451                        MemTxAttrs attrs, MemTxResult *result)
3452 {
3453     address_space_stw_internal(as, addr, val, attrs, result,
3454                                DEVICE_NATIVE_ENDIAN);
3455 }
3456
3457 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3458                        MemTxAttrs attrs, MemTxResult *result)
3459 {
3460     address_space_stw_internal(as, addr, val, attrs, result,
3461                                DEVICE_LITTLE_ENDIAN);
3462 }
3463
3464 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3465                        MemTxAttrs attrs, MemTxResult *result)
3466 {
3467     address_space_stw_internal(as, addr, val, attrs, result,
3468                                DEVICE_BIG_ENDIAN);
3469 }
3470
3471 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3472 {
3473     address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3474 }
3475
3476 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3477 {
3478     address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3479 }
3480
3481 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3482 {
3483     address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3484 }
3485
3486 /* XXX: optimize */
3487 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3488                        MemTxAttrs attrs, MemTxResult *result)
3489 {
3490     MemTxResult r;
3491     val = tswap64(val);
3492     r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3493     if (result) {
3494         *result = r;
3495     }
3496 }
3497
3498 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3499                        MemTxAttrs attrs, MemTxResult *result)
3500 {
3501     MemTxResult r;
3502     val = cpu_to_le64(val);
3503     r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3504     if (result) {
3505         *result = r;
3506     }
3507 }
3508 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3509                        MemTxAttrs attrs, MemTxResult *result)
3510 {
3511     MemTxResult r;
3512     val = cpu_to_be64(val);
3513     r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3514     if (result) {
3515         *result = r;
3516     }
3517 }
3518
3519 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3520 {
3521     address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3522 }
3523
3524 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3525 {
3526     address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3527 }
3528
3529 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3530 {
3531     address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3532 }
3533
3534 /* virtual memory access for debug (includes writing to ROM) */
3535 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3536                         uint8_t *buf, int len, int is_write)
3537 {
3538     int l;
3539     hwaddr phys_addr;
3540     target_ulong page;
3541
3542     while (len > 0) {
3543         page = addr & TARGET_PAGE_MASK;
3544         phys_addr = cpu_get_phys_page_debug(cpu, page);
3545         /* if no physical page mapped, return an error */
3546         if (phys_addr == -1)
3547             return -1;
3548         l = (page + TARGET_PAGE_SIZE) - addr;
3549         if (l > len)
3550             l = len;
3551         phys_addr += (addr & ~TARGET_PAGE_MASK);
3552         if (is_write) {
3553             cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
3554         } else {
3555             address_space_rw(cpu->as, phys_addr, MEMTXATTRS_UNSPECIFIED,
3556                              buf, l, 0);
3557         }
3558         len -= l;
3559         buf += l;
3560         addr += l;
3561     }
3562     return 0;
3563 }
3564
3565 /*
3566  * Allows code that needs to deal with migration bitmaps etc to still be built
3567  * target independent.
3568  */
3569 size_t qemu_target_page_bits(void)
3570 {
3571     return TARGET_PAGE_BITS;
3572 }
3573
3574 #endif
3575
3576 /*
3577  * A helper function for the _utterly broken_ virtio device model to find out if
3578  * it's running on a big endian machine. Don't do this at home kids!
3579  */
3580 bool target_words_bigendian(void);
3581 bool target_words_bigendian(void)
3582 {
3583 #if defined(TARGET_WORDS_BIGENDIAN)
3584     return true;
3585 #else
3586     return false;
3587 #endif
3588 }
3589
3590 #ifndef CONFIG_USER_ONLY
3591 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3592 {
3593     MemoryRegion*mr;
3594     hwaddr l = 1;
3595     bool res;
3596
3597     rcu_read_lock();
3598     mr = address_space_translate(&address_space_memory,
3599                                  phys_addr, &phys_addr, &l, false);
3600
3601     res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3602     rcu_read_unlock();
3603     return res;
3604 }
3605
3606 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3607 {
3608     RAMBlock *block;
3609     int ret = 0;
3610
3611     rcu_read_lock();
3612     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3613         ret = func(block->idstr, block->host, block->offset,
3614                    block->used_length, opaque);
3615         if (ret) {
3616             break;
3617         }
3618     }
3619     rcu_read_unlock();
3620     return ret;
3621 }
3622 #endif