exec.c

   1 /*
   2  *  Virtual page mapping
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include "qemu/osdep.h"
  20 #include "qapi/error.h"
  21 #ifndef _WIN32
  22 #endif
  23
  24 #include "qemu/cutils.h"
  25 #include "cpu.h"
  26 #include "exec/exec-all.h"
  27 #include "tcg.h"
  28 #include "hw/qdev-core.h"
  29 #if !defined(CONFIG_USER_ONLY)
  30 #include "hw/boards.h"
  31 #include "hw/xen/xen.h"
  32 #endif
  33 #include "sysemu/kvm.h"
  34 #include "sysemu/hax.h"
  35 #include "sysemu/sysemu.h"
  36 #include "qemu/timer.h"
  37 #include "qemu/config-file.h"
  38 #include "qemu/error-report.h"
  39 #if defined(CONFIG_USER_ONLY)
  40 #include "qemu.h"
  41 #else /* !CONFIG_USER_ONLY */
  42 #include "hw/hw.h"
  43 #include "exec/memory.h"
  44 #include "exec/ioport.h"
  45 #include "sysemu/dma.h"
  46 #include "exec/address-spaces.h"
  47 #include "sysemu/xen-mapcache.h"
  48 #include "trace.h"
  49 #endif
  50 #include "exec/cpu-all.h"
  51 #include "qemu/rcu_queue.h"
  52 #include "qemu/main-loop.h"
  53 #include "translate-all.h"
  54 #include "sysemu/replay.h"
  55
  56 #include "exec/memory-internal.h"
  57 #include "exec/ram_addr.h"
  58 #include "exec/log.h"
  59
  60 #include "migration/vmstate.h"
  61
  62 #include "qemu/range.h"
  63 #ifndef _WIN32
  64 #include "qemu/mmap-alloc.h"
  65 #endif
  66
  67 //#define DEBUG_SUBPAGE
  68
  69 #if !defined(CONFIG_USER_ONLY)
  70 /* ram_list is read under rcu_read_lock()/rcu_read_unlock().  Writes
  71  * are protected by the ramlist lock.
  72  */
  73 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
  74
  75 static MemoryRegion *system_memory;
  76 static MemoryRegion *system_io;
  77
  78 AddressSpace address_space_io;
  79 AddressSpace address_space_memory;
  80
  81 MemoryRegion io_mem_rom, io_mem_notdirty;
  82 static MemoryRegion io_mem_unassigned;
  83
  84 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
  85 #define RAM_PREALLOC   (1 << 0)
  86
  87 /* RAM is mmap-ed with MAP_SHARED */
  88 #define RAM_SHARED     (1 << 1)
  89
  90 /* Only a portion of RAM (used_length) is actually used, and migrated.
  91  * This used_length size can change across reboots.
  92  */
  93 #define RAM_RESIZEABLE (1 << 2)
  94
  95 #endif
  96
  97 #ifdef TARGET_PAGE_BITS_VARY
  98 int target_page_bits;
  99 bool target_page_bits_decided;
 100 #endif
 101
 102 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
 103 /* current CPU in the current thread. It is only valid inside
 104    cpu_exec() */
 105 __thread CPUState *current_cpu;
 106 /* 0 = Do not count executed instructions.
 107    1 = Precise instruction counting.
 108    2 = Adaptive rate instruction counting.  */
 109 int use_icount;
 110
 111 bool set_preferred_target_page_bits(int bits)
 112 {
 113     /* The target page size is the lowest common denominator for all
 114      * the CPUs in the system, so we can only make it smaller, never
 115      * larger. And we can't make it smaller once we've committed to
 116      * a particular size.
 117      */
 118 #ifdef TARGET_PAGE_BITS_VARY
 119     assert(bits >= TARGET_PAGE_BITS_MIN);
 120     if (target_page_bits == 0 || target_page_bits > bits) {
 121         if (target_page_bits_decided) {
 122             return false;
 123         }
 124         target_page_bits = bits;
 125     }
 126 #endif
 127     return true;
 128 }
 129
 130 #if !defined(CONFIG_USER_ONLY)
 131
 132 static void finalize_target_page_bits(void)
 133 {
 134 #ifdef TARGET_PAGE_BITS_VARY
 135     if (target_page_bits == 0) {
 136         target_page_bits = TARGET_PAGE_BITS_MIN;
 137     }
 138     target_page_bits_decided = true;
 139 #endif
 140 }
 141
 142 typedef struct PhysPageEntry PhysPageEntry;
 143
 144 struct PhysPageEntry {
 145     /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
 146     uint32_t skip : 6;
 147      /* index into phys_sections (!skip) or phys_map_nodes (skip) */
 148     uint32_t ptr : 26;
 149 };
 150
 151 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
 152
 153 /* Size of the L2 (and L3, etc) page tables.  */
 154 #define ADDR_SPACE_BITS 64
 155
 156 #define P_L2_BITS 9
 157 #define P_L2_SIZE (1 << P_L2_BITS)
 158
 159 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
 160
 161 typedef PhysPageEntry Node[P_L2_SIZE];
 162
 163 typedef struct PhysPageMap {
 164     struct rcu_head rcu;
 165
 166     unsigned sections_nb;
 167     unsigned sections_nb_alloc;
 168     unsigned nodes_nb;
 169     unsigned nodes_nb_alloc;
 170     Node *nodes;
 171     MemoryRegionSection *sections;
 172 } PhysPageMap;
 173
 174 struct AddressSpaceDispatch {
 175     struct rcu_head rcu;
 176
 177     MemoryRegionSection *mru_section;
 178     /* This is a multi-level map on the physical address space.
 179      * The bottom level has pointers to MemoryRegionSections.
 180      */
 181     PhysPageEntry phys_map;
 182     PhysPageMap map;
 183     AddressSpace *as;
 184 };
 185
 186 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
 187 typedef struct subpage_t {
 188     MemoryRegion iomem;
 189     AddressSpace *as;
 190     hwaddr base;
 191     uint16_t sub_section[];
 192 } subpage_t;
 193
 194 #define PHYS_SECTION_UNASSIGNED 0
 195 #define PHYS_SECTION_NOTDIRTY 1
 196 #define PHYS_SECTION_ROM 2
 197 #define PHYS_SECTION_WATCH 3
 198
 199 static void io_mem_init(void);
 200 static void memory_map_init(void);
 201 static void tcg_commit(MemoryListener *listener);
 202
 203 static MemoryRegion io_mem_watch;
 204
 205 /**
 206  * CPUAddressSpace: all the information a CPU needs about an AddressSpace
 207  * @cpu: the CPU whose AddressSpace this is
 208  * @as: the AddressSpace itself
 209  * @memory_dispatch: its dispatch pointer (cached, RCU protected)
 210  * @tcg_as_listener: listener for tracking changes to the AddressSpace
 211  */
 212 struct CPUAddressSpace {
 213     CPUState *cpu;
 214     AddressSpace *as;
 215     struct AddressSpaceDispatch *memory_dispatch;
 216     MemoryListener tcg_as_listener;
 217 };
 218
 219 #endif
 220
 221 #if !defined(CONFIG_USER_ONLY)
 222
 223 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
 224 {
 225     static unsigned alloc_hint = 16;
 226     if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
 227         map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, alloc_hint);
 228         map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
 229         map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
 230         alloc_hint = map->nodes_nb_alloc;
 231     }
 232 }
 233
 234 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
 235 {
 236     unsigned i;
 237     uint32_t ret;
 238     PhysPageEntry e;
 239     PhysPageEntry *p;
 240
 241     ret = map->nodes_nb++;
 242     p = map->nodes[ret];
 243     assert(ret != PHYS_MAP_NODE_NIL);
 244     assert(ret != map->nodes_nb_alloc);
 245
 246     e.skip = leaf ? 0 : 1;
 247     e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
 248     for (i = 0; i < P_L2_SIZE; ++i) {
 249         memcpy(&p[i], &e, sizeof(e));
 250     }
 251     return ret;
 252 }
 253
 254 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
 255                                 hwaddr *index, hwaddr *nb, uint16_t leaf,
 256                                 int level)
 257 {
 258     PhysPageEntry *p;
 259     hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
 260
 261     if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
 262         lp->ptr = phys_map_node_alloc(map, level == 0);
 263     }
 264     p = map->nodes[lp->ptr];
 265     lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
 266
 267     while (*nb && lp < &p[P_L2_SIZE]) {
 268         if ((*index & (step - 1)) == 0 && *nb >= step) {
 269             lp->skip = 0;
 270             lp->ptr = leaf;
 271             *index += step;
 272             *nb -= step;
 273         } else {
 274             phys_page_set_level(map, lp, index, nb, leaf, level - 1);
 275         }
 276         ++lp;
 277     }
 278 }
 279
 280 static void phys_page_set(AddressSpaceDispatch *d,
 281                           hwaddr index, hwaddr nb,
 282                           uint16_t leaf)
 283 {
 284     /* Wildly overreserve - it doesn't matter much. */
 285     phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
 286
 287     phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
 288 }
 289
 290 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
 291  * and update our entry so we can skip it and go directly to the destination.
 292  */
 293 static void phys_page_compact(PhysPageEntry *lp, Node *nodes)
 294 {
 295     unsigned valid_ptr = P_L2_SIZE;
 296     int valid = 0;
 297     PhysPageEntry *p;
 298     int i;
 299
 300     if (lp->ptr == PHYS_MAP_NODE_NIL) {
 301         return;
 302     }
 303
 304     p = nodes[lp->ptr];
 305     for (i = 0; i < P_L2_SIZE; i++) {
 306         if (p[i].ptr == PHYS_MAP_NODE_NIL) {
 307             continue;
 308         }
 309
 310         valid_ptr = i;
 311         valid++;
 312         if (p[i].skip) {
 313             phys_page_compact(&p[i], nodes);
 314         }
 315     }
 316
 317     /* We can only compress if there's only one child. */
 318     if (valid != 1) {
 319         return;
 320     }
 321
 322     assert(valid_ptr < P_L2_SIZE);
 323
 324     /* Don't compress if it won't fit in the # of bits we have. */
 325     if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
 326         return;
 327     }
 328
 329     lp->ptr = p[valid_ptr].ptr;
 330     if (!p[valid_ptr].skip) {
 331         /* If our only child is a leaf, make this a leaf. */
 332         /* By design, we should have made this node a leaf to begin with so we
 333          * should never reach here.
 334          * But since it's so simple to handle this, let's do it just in case we
 335          * change this rule.
 336          */
 337         lp->skip = 0;
 338     } else {
 339         lp->skip += p[valid_ptr].skip;
 340     }
 341 }
 342
 343 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
 344 {
 345     if (d->phys_map.skip) {
 346         phys_page_compact(&d->phys_map, d->map.nodes);
 347     }
 348 }
 349
 350 static inline bool section_covers_addr(const MemoryRegionSection *section,
 351                                        hwaddr addr)
 352 {
 353     /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
 354      * the section must cover the entire address space.
 355      */
 356     return int128_gethi(section->size) ||
 357            range_covers_byte(section->offset_within_address_space,
 358                              int128_getlo(section->size), addr);
 359 }
 360
 361 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
 362                                            Node *nodes, MemoryRegionSection *sections)
 363 {
 364     PhysPageEntry *p;
 365     hwaddr index = addr >> TARGET_PAGE_BITS;
 366     int i;
 367
 368     for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
 369         if (lp.ptr == PHYS_MAP_NODE_NIL) {
 370             return &sections[PHYS_SECTION_UNASSIGNED];
 371         }
 372         p = nodes[lp.ptr];
 373         lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
 374     }
 375
 376     if (section_covers_addr(&sections[lp.ptr], addr)) {
 377         return &sections[lp.ptr];
 378     } else {
 379         return &sections[PHYS_SECTION_UNASSIGNED];
 380     }
 381 }
 382
 383 bool memory_region_is_unassigned(MemoryRegion *mr)
 384 {
 385     return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
 386         && mr != &io_mem_watch;
 387 }
 388
 389 /* Called from RCU critical section */
 390 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
 391                                                         hwaddr addr,
 392                                                         bool resolve_subpage)
 393 {
 394     MemoryRegionSection *section = atomic_read(&d->mru_section);
 395     subpage_t *subpage;
 396     bool update;
 397
 398     if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
 399         section_covers_addr(section, addr)) {
 400         update = false;
 401     } else {
 402         section = phys_page_find(d->phys_map, addr, d->map.nodes,
 403                                  d->map.sections);
 404         update = true;
 405     }
 406     if (resolve_subpage && section->mr->subpage) {
 407         subpage = container_of(section->mr, subpage_t, iomem);
 408         section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
 409     }
 410     if (update) {
 411         atomic_set(&d->mru_section, section);
 412     }
 413     return section;
 414 }
 415
 416 /* Called from RCU critical section */
 417 static MemoryRegionSection *
 418 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
 419                                  hwaddr *plen, bool resolve_subpage)
 420 {
 421     MemoryRegionSection *section;
 422     MemoryRegion *mr;
 423     Int128 diff;
 424
 425     section = address_space_lookup_region(d, addr, resolve_subpage);
 426     /* Compute offset within MemoryRegionSection */
 427     addr -= section->offset_within_address_space;
 428
 429     /* Compute offset within MemoryRegion */
 430     *xlat = addr + section->offset_within_region;
 431
 432     mr = section->mr;
 433
 434     /* MMIO registers can be expected to perform full-width accesses based only
 435      * on their address, without considering adjacent registers that could
 436      * decode to completely different MemoryRegions.  When such registers
 437      * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
 438      * regions overlap wildly.  For this reason we cannot clamp the accesses
 439      * here.
 440      *
 441      * If the length is small (as is the case for address_space_ldl/stl),
 442      * everything works fine.  If the incoming length is large, however,
 443      * the caller really has to do the clamping through memory_access_size.
 444      */
 445     if (memory_region_is_ram(mr)) {
 446         diff = int128_sub(section->size, int128_make64(addr));
 447         *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
 448     }
 449     return section;
 450 }
 451
 452 /* Called from RCU critical section */
 453 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
 454                                       hwaddr *xlat, hwaddr *plen,
 455                                       bool is_write)
 456 {
 457     IOMMUTLBEntry iotlb;
 458     MemoryRegionSection *section;
 459     MemoryRegion *mr;
 460
 461     for (;;) {
 462         AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
 463         section = address_space_translate_internal(d, addr, &addr, plen, true);
 464         mr = section->mr;
 465
 466         if (!mr->iommu_ops) {
 467             break;
 468         }
 469
 470         iotlb = mr->iommu_ops->translate(mr, addr, is_write);
 471         addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
 472                 | (addr & iotlb.addr_mask));
 473         *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
 474         if (!(iotlb.perm & (1 << is_write))) {
 475             mr = &io_mem_unassigned;
 476             break;
 477         }
 478
 479         as = iotlb.target_as;
 480     }
 481
 482     if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
 483         hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
 484         *plen = MIN(page, *plen);
 485     }
 486
 487     *xlat = addr;
 488     return mr;
 489 }
 490
 491 /* Called from RCU critical section */
 492 MemoryRegionSection *
 493 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
 494                                   hwaddr *xlat, hwaddr *plen)
 495 {
 496     MemoryRegionSection *section;
 497     AddressSpaceDispatch *d = atomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch);
 498
 499     section = address_space_translate_internal(d, addr, xlat, plen, false);
 500
 501     assert(!section->mr->iommu_ops);
 502     return section;
 503 }
 504 #endif
 505
 506 #if !defined(CONFIG_USER_ONLY)
 507
 508 static int cpu_common_post_load(void *opaque, int version_id)
 509 {
 510     CPUState *cpu = opaque;
 511
 512     /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
 513        version_id is increased. */
 514     cpu->interrupt_request &= ~0x01;
 515     tlb_flush(cpu, 1);
 516
 517     return 0;
 518 }
 519
 520 static int cpu_common_pre_load(void *opaque)
 521 {
 522     CPUState *cpu = opaque;
 523
 524     cpu->exception_index = -1;
 525
 526     return 0;
 527 }
 528
 529 static bool cpu_common_exception_index_needed(void *opaque)
 530 {
 531     CPUState *cpu = opaque;
 532
 533     return tcg_enabled() && cpu->exception_index != -1;
 534 }
 535
 536 static const VMStateDescription vmstate_cpu_common_exception_index = {
 537     .name = "cpu_common/exception_index",
 538     .version_id = 1,
 539     .minimum_version_id = 1,
 540     .needed = cpu_common_exception_index_needed,
 541     .fields = (VMStateField[]) {
 542         VMSTATE_INT32(exception_index, CPUState),
 543         VMSTATE_END_OF_LIST()
 544     }
 545 };
 546
 547 static bool cpu_common_crash_occurred_needed(void *opaque)
 548 {
 549     CPUState *cpu = opaque;
 550
 551     return cpu->crash_occurred;
 552 }
 553
 554 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
 555     .name = "cpu_common/crash_occurred",
 556     .version_id = 1,
 557     .minimum_version_id = 1,
 558     .needed = cpu_common_crash_occurred_needed,
 559     .fields = (VMStateField[]) {
 560         VMSTATE_BOOL(crash_occurred, CPUState),
 561         VMSTATE_END_OF_LIST()
 562     }
 563 };
 564
 565 const VMStateDescription vmstate_cpu_common = {
 566     .name = "cpu_common",
 567     .version_id = 1,
 568     .minimum_version_id = 1,
 569     .pre_load = cpu_common_pre_load,
 570     .post_load = cpu_common_post_load,
 571     .fields = (VMStateField[]) {
 572         VMSTATE_UINT32(halted, CPUState),
 573         VMSTATE_UINT32(interrupt_request, CPUState),
 574         VMSTATE_END_OF_LIST()
 575     },
 576     .subsections = (const VMStateDescription*[]) {
 577         &vmstate_cpu_common_exception_index,
 578         &vmstate_cpu_common_crash_occurred,
 579         NULL
 580     }
 581 };
 582
 583 #endif
 584
 585 CPUState *qemu_get_cpu(int index)
 586 {
 587     CPUState *cpu;
 588
 589     CPU_FOREACH(cpu) {
 590         if (cpu->cpu_index == index) {
 591             return cpu;
 592         }
 593     }
 594
 595     return NULL;
 596 }
 597
 598 #if !defined(CONFIG_USER_ONLY)
 599 void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
 600 {
 601     CPUAddressSpace *newas;
 602
 603     /* Target code should have set num_ases before calling us */
 604     assert(asidx < cpu->num_ases);
 605
 606     if (asidx == 0) {
 607         /* address space 0 gets the convenience alias */
 608         cpu->as = as;
 609     }
 610
 611     /* KVM cannot currently support multiple address spaces. */
 612     assert(asidx == 0 || !kvm_enabled());
 613
 614     if (!cpu->cpu_ases) {
 615         cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
 616     }
 617
 618     newas = &cpu->cpu_ases[asidx];
 619     newas->cpu = cpu;
 620     newas->as = as;
 621     if (tcg_enabled()) {
 622         newas->tcg_as_listener.commit = tcg_commit;
 623         memory_listener_register(&newas->tcg_as_listener, as);
 624     }
 625 }
 626
 627 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
 628 {
 629     /* Return the AddressSpace corresponding to the specified index */
 630     return cpu->cpu_ases[asidx].as;
 631 }
 632 #endif
 633
 634 void cpu_exec_unrealizefn(CPUState *cpu)
 635 {
 636     CPUClass *cc = CPU_GET_CLASS(cpu);
 637
 638     cpu_list_remove(cpu);
 639
 640     if (cc->vmsd != NULL) {
 641         vmstate_unregister(NULL, cc->vmsd, cpu);
 642     }
 643     if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
 644         vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
 645     }
 646 }
 647
 648 void cpu_exec_initfn(CPUState *cpu)
 649 {
 650 #ifdef TARGET_WORDS_BIGENDIAN
 651     cpu->bigendian = true;
 652 #else
 653     cpu->bigendian = false;
 654 #endif
 655     cpu->as = NULL;
 656     cpu->num_ases = 0;
 657
 658 #ifndef CONFIG_USER_ONLY
 659     cpu->thread_id = qemu_get_thread_id();
 660
 661     /* This is a softmmu CPU object, so create a property for it
 662      * so users can wire up its memory. (This can't go in qom/cpu.c
 663      * because that file is compiled only once for both user-mode
 664      * and system builds.) The default if no link is set up is to use
 665      * the system address space.
 666      */
 667     object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
 668                              (Object **)&cpu->memory,
 669                              qdev_prop_allow_set_link_before_realize,
 670                              OBJ_PROP_LINK_UNREF_ON_RELEASE,
 671                              &error_abort);
 672     cpu->memory = system_memory;
 673     object_ref(OBJECT(cpu->memory));
 674 #endif
 675 }
 676
 677 void cpu_exec_realizefn(CPUState *cpu, Error **errp)
 678 {
 679     CPUClass *cc ATTRIBUTE_UNUSED = CPU_GET_CLASS(cpu);
 680
 681     cpu_list_add(cpu);
 682
 683 #ifndef CONFIG_USER_ONLY
 684     if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
 685         vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
 686     }
 687     if (cc->vmsd != NULL) {
 688         vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
 689     }
 690 #endif
 691 }
 692
 693 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 694 {
 695     /* Flush the whole TB as this will not have race conditions
 696      * even if we don't have proper locking yet.
 697      * Ideally we would just invalidate the TBs for the
 698      * specified PC.
 699      */
 700     tb_flush(cpu);
 701 }
 702
 703 #if defined(CONFIG_USER_ONLY)
 704 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 705
 706 {
 707 }
 708
 709 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 710                           int flags)
 711 {
 712     return -ENOSYS;
 713 }
 714
 715 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 716 {
 717 }
 718
 719 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 720                           int flags, CPUWatchpoint **watchpoint)
 721 {
 722     return -ENOSYS;
 723 }
 724 #else
 725 /* Add a watchpoint.  */
 726 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 727                           int flags, CPUWatchpoint **watchpoint)
 728 {
 729     CPUWatchpoint *wp;
 730
 731     /* forbid ranges which are empty or run off the end of the address space */
 732     if (len == 0 || (addr + len - 1) < addr) {
 733         error_report("tried to set invalid watchpoint at %"
 734                      VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
 735         return -EINVAL;
 736     }
 737     wp = g_malloc(sizeof(*wp));
 738
 739     wp->vaddr = addr;
 740     wp->len = len;
 741     wp->flags = flags;
 742
 743     /* keep all GDB-injected watchpoints in front */
 744     if (flags & BP_GDB) {
 745         QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
 746     } else {
 747         QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
 748     }
 749
 750     tlb_flush_page(cpu, addr);
 751
 752     if (watchpoint)
 753         *watchpoint = wp;
 754     return 0;
 755 }
 756
 757 /* Remove a specific watchpoint.  */
 758 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 759                           int flags)
 760 {
 761     CPUWatchpoint *wp;
 762
 763     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
 764         if (addr == wp->vaddr && len == wp->len
 765                 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
 766             cpu_watchpoint_remove_by_ref(cpu, wp);
 767             return 0;
 768         }
 769     }
 770     return -ENOENT;
 771 }
 772
 773 /* Remove a specific watchpoint by reference.  */
 774 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 775 {
 776     QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
 777
 778     tlb_flush_page(cpu, watchpoint->vaddr);
 779
 780     g_free(watchpoint);
 781 }
 782
 783 /* Remove all matching watchpoints.  */
 784 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 785 {
 786     CPUWatchpoint *wp, *next;
 787
 788     QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
 789         if (wp->flags & mask) {
 790             cpu_watchpoint_remove_by_ref(cpu, wp);
 791         }
 792     }
 793 }
 794
 795 /* Return true if this watchpoint address matches the specified
 796  * access (ie the address range covered by the watchpoint overlaps
 797  * partially or completely with the address range covered by the
 798  * access).
 799  */
 800 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
 801                                                   vaddr addr,
 802                                                   vaddr len)
 803 {
 804     /* We know the lengths are non-zero, but a little caution is
 805      * required to avoid errors in the case where the range ends
 806      * exactly at the top of the address space and so addr + len
 807      * wraps round to zero.
 808      */
 809     vaddr wpend = wp->vaddr + wp->len - 1;
 810     vaddr addrend = addr + len - 1;
 811
 812     return !(addr > wpend || wp->vaddr > addrend);
 813 }
 814
 815 #endif
 816
 817 /* Add a breakpoint.  */
 818 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
 819                           CPUBreakpoint **breakpoint)
 820 {
 821     CPUBreakpoint *bp;
 822
 823     bp = g_malloc(sizeof(*bp));
 824
 825     bp->pc = pc;
 826     bp->flags = flags;
 827
 828     /* keep all GDB-injected breakpoints in front */
 829     if (flags & BP_GDB) {
 830         QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
 831     } else {
 832         QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
 833     }
 834
 835     breakpoint_invalidate(cpu, pc);
 836
 837     if (breakpoint) {
 838         *breakpoint = bp;
 839     }
 840     return 0;
 841 }
 842
 843 /* Remove a specific breakpoint.  */
 844 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
 845 {
 846     CPUBreakpoint *bp;
 847
 848     QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
 849         if (bp->pc == pc && bp->flags == flags) {
 850             cpu_breakpoint_remove_by_ref(cpu, bp);
 851             return 0;
 852         }
 853     }
 854     return -ENOENT;
 855 }
 856
 857 /* Remove a specific breakpoint by reference.  */
 858 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
 859 {
 860     QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
 861
 862     breakpoint_invalidate(cpu, breakpoint->pc);
 863
 864     g_free(breakpoint);
 865 }
 866
 867 /* Remove all matching breakpoints. */
 868 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
 869 {
 870     CPUBreakpoint *bp, *next;
 871
 872     QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
 873         if (bp->flags & mask) {
 874             cpu_breakpoint_remove_by_ref(cpu, bp);
 875         }
 876     }
 877 }
 878
 879 /* enable or disable single step mode. EXCP_DEBUG is returned by the
 880    CPU loop after each instruction */
 881 void cpu_single_step(CPUState *cpu, int enabled)
 882 {
 883     if (cpu->singlestep_enabled != enabled) {
 884         cpu->singlestep_enabled = enabled;
 885         if (kvm_enabled()) {
 886             kvm_update_guest_debug(cpu, 0);
 887         } else {
 888             /* must flush all the translated code to avoid inconsistencies */
 889             /* XXX: only flush what is necessary */
 890             tb_flush(cpu);
 891         }
 892     }
 893 }
 894
 895 void QEMU_NORETURN cpu_abort(CPUState *cpu, const char *fmt, ...)
 896 {
 897     va_list ap;
 898     va_list ap2;
 899
 900     va_start(ap, fmt);
 901     va_copy(ap2, ap);
 902     fprintf(stderr, "qemu: fatal: ");
 903     vfprintf(stderr, fmt, ap);
 904     fprintf(stderr, "\n");
 905     cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 906     if (qemu_log_separate()) {
 907         qemu_log_lock();
 908         qemu_log("qemu: fatal: ");
 909         qemu_log_vprintf(fmt, ap2);
 910         qemu_log("\n");
 911         log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 912         qemu_log_flush();
 913         qemu_log_unlock();
 914         qemu_log_close();
 915     }
 916     va_end(ap2);
 917     va_end(ap);
 918     replay_finish();
 919 #if defined(CONFIG_USER_ONLY)
 920     {
 921         struct sigaction act;
 922         sigfillset(&act.sa_mask);
 923         act.sa_handler = SIG_DFL;
 924         sigaction(SIGABRT, &act, NULL);
 925     }
 926 #endif
 927     abort();
 928 }
 929
 930 #if !defined(CONFIG_USER_ONLY)
 931 /* Called from RCU critical section */
 932 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
 933 {
 934     RAMBlock *block;
 935
 936     block = atomic_rcu_read(&ram_list.mru_block);
 937     if (block && addr - block->offset < block->max_length) {
 938         return block;
 939     }
 940     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
 941         if (addr - block->offset < block->max_length) {
 942             goto found;
 943         }
 944     }
 945
 946     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
 947     abort();
 948
 949 found:
 950     /* It is safe to write mru_block outside the iothread lock.  This
 951      * is what happens:
 952      *
 953      *     mru_block = xxx
 954      *     rcu_read_unlock()
 955      *                                        xxx removed from list
 956      *                  rcu_read_lock()
 957      *                  read mru_block
 958      *                                        mru_block = NULL;
 959      *                                        call_rcu(reclaim_ramblock, xxx);
 960      *                  rcu_read_unlock()
 961      *
 962      * atomic_rcu_set is not needed here.  The block was already published
 963      * when it was placed into the list.  Here we're just making an extra
 964      * copy of the pointer.
 965      */
 966     ram_list.mru_block = block;
 967     return block;
 968 }
 969
 970 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
 971 {
 972     CPUState *cpu;
 973     ram_addr_t start1;
 974     RAMBlock *block;
 975     ram_addr_t end;
 976
 977     end = TARGET_PAGE_ALIGN(start + length);
 978     start &= TARGET_PAGE_MASK;
 979
 980     rcu_read_lock();
 981     block = qemu_get_ram_block(start);
 982     assert(block == qemu_get_ram_block(end - 1));
 983     start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
 984     CPU_FOREACH(cpu) {
 985         tlb_reset_dirty(cpu, start1, length);
 986     }
 987     rcu_read_unlock();
 988 }
 989
 990 /* Note: start and end must be within the same ram block.  */
 991 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
 992                                               ram_addr_t length,
 993                                               unsigned client)
 994 {
 995     DirtyMemoryBlocks *blocks;
 996     unsigned long end, page;
 997     bool dirty = false;
 998
 999     if (length == 0) {
1000         return false;
1001     }
1002
1003     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
1004     page = start >> TARGET_PAGE_BITS;
1005
1006     rcu_read_lock();
1007
1008     blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1009
1010     while (page < end) {
1011         unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1012         unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1013         unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1014
1015         dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1016                                               offset, num);
1017         page += num;
1018     }
1019
1020     rcu_read_unlock();
1021
1022     if (dirty && tcg_enabled()) {
1023         tlb_reset_dirty_range_all(start, length);
1024     }
1025
1026     return dirty;
1027 }
1028
1029 /* Called from RCU critical section */
1030 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1031                                        MemoryRegionSection *section,
1032                                        target_ulong vaddr,
1033                                        hwaddr paddr, hwaddr xlat,
1034                                        int prot,
1035                                        target_ulong *address)
1036 {
1037     hwaddr iotlb;
1038     CPUWatchpoint *wp;
1039
1040     if (memory_region_is_ram(section->mr)) {
1041         /* Normal RAM.  */
1042         iotlb = memory_region_get_ram_addr(section->mr) + xlat;
1043         if (!section->readonly) {
1044             iotlb |= PHYS_SECTION_NOTDIRTY;
1045         } else {
1046             iotlb |= PHYS_SECTION_ROM;
1047         }
1048     } else {
1049         AddressSpaceDispatch *d;
1050
1051         d = atomic_rcu_read(&section->address_space->dispatch);
1052         iotlb = section - d->map.sections;
1053         iotlb += xlat;
1054     }
1055
1056     /* Make accesses to pages with watchpoints go via the
1057        watchpoint trap routines.  */
1058     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1059         if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1060             /* Avoid trapping reads of pages with a write breakpoint. */
1061             if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1062                 iotlb = PHYS_SECTION_WATCH + paddr;
1063                 *address |= TLB_MMIO;
1064                 break;
1065             }
1066         }
1067     }
1068
1069     return iotlb;
1070 }
1071 #endif /* defined(CONFIG_USER_ONLY) */
1072
1073 #if !defined(CONFIG_USER_ONLY)
1074
1075 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1076                              uint16_t section);
1077 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1078
1079 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1080                                qemu_anon_ram_alloc;
1081
1082 /*
1083  * Set a custom physical guest memory alloator.
1084  * Accelerators with unusual needs may need this.  Hopefully, we can
1085  * get rid of it eventually.
1086  */
1087 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1088 {
1089     phys_mem_alloc = alloc;
1090 }
1091
1092 static uint16_t phys_section_add(PhysPageMap *map,
1093                                  MemoryRegionSection *section)
1094 {
1095     /* The physical section number is ORed with a page-aligned
1096      * pointer to produce the iotlb entries.  Thus it should
1097      * never overflow into the page-aligned value.
1098      */
1099     assert(map->sections_nb < TARGET_PAGE_SIZE);
1100
1101     if (map->sections_nb == map->sections_nb_alloc) {
1102         map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1103         map->sections = g_renew(MemoryRegionSection, map->sections,
1104                                 map->sections_nb_alloc);
1105     }
1106     map->sections[map->sections_nb] = *section;
1107     memory_region_ref(section->mr);
1108     return map->sections_nb++;
1109 }
1110
1111 static void phys_section_destroy(MemoryRegion *mr)
1112 {
1113     bool have_sub_page = mr->subpage;
1114
1115     memory_region_unref(mr);
1116
1117     if (have_sub_page) {
1118         subpage_t *subpage = container_of(mr, subpage_t, iomem);
1119         object_unref(OBJECT(&subpage->iomem));
1120         g_free(subpage);
1121     }
1122 }
1123
1124 static void phys_sections_free(PhysPageMap *map)
1125 {
1126     while (map->sections_nb > 0) {
1127         MemoryRegionSection *section = &map->sections[--map->sections_nb];
1128         phys_section_destroy(section->mr);
1129     }
1130     g_free(map->sections);
1131     g_free(map->nodes);
1132 }
1133
1134 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1135 {
1136     subpage_t *subpage;
1137     hwaddr base = section->offset_within_address_space
1138         & TARGET_PAGE_MASK;
1139     MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1140                                                    d->map.nodes, d->map.sections);
1141     MemoryRegionSection subsection = {
1142         .offset_within_address_space = base,
1143         .size = int128_make64(TARGET_PAGE_SIZE),
1144     };
1145     hwaddr start, end;
1146
1147     assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1148
1149     if (!(existing->mr->subpage)) {
1150         subpage = subpage_init(d->as, base);
1151         subsection.address_space = d->as;
1152         subsection.mr = &subpage->iomem;
1153         phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1154                       phys_section_add(&d->map, &subsection));
1155     } else {
1156         subpage = container_of(existing->mr, subpage_t, iomem);
1157     }
1158     start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1159     end = start + int128_get64(section->size) - 1;
1160     subpage_register(subpage, start, end,
1161                      phys_section_add(&d->map, section));
1162 }
1163
1164
1165 static void register_multipage(AddressSpaceDispatch *d,
1166                                MemoryRegionSection *section)
1167 {
1168     hwaddr start_addr = section->offset_within_address_space;
1169     uint16_t section_index = phys_section_add(&d->map, section);
1170     uint64_t num_pages = int128_get64(int128_rshift(section->size,
1171                                                     TARGET_PAGE_BITS));
1172
1173     assert(num_pages);
1174     phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1175 }
1176
1177 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1178 {
1179     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1180     AddressSpaceDispatch *d = as->next_dispatch;
1181     MemoryRegionSection now = *section, remain = *section;
1182     Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1183
1184     if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1185         uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1186                        - now.offset_within_address_space;
1187
1188         now.size = int128_min(int128_make64(left), now.size);
1189         register_subpage(d, &now);
1190     } else {
1191         now.size = int128_zero();
1192     }
1193     while (int128_ne(remain.size, now.size)) {
1194         remain.size = int128_sub(remain.size, now.size);
1195         remain.offset_within_address_space += int128_get64(now.size);
1196         remain.offset_within_region += int128_get64(now.size);
1197         now = remain;
1198         if (int128_lt(remain.size, page_size)) {
1199             register_subpage(d, &now);
1200         } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1201             now.size = page_size;
1202             register_subpage(d, &now);
1203         } else {
1204             now.size = int128_and(now.size, int128_neg(page_size));
1205             register_multipage(d, &now);
1206         }
1207     }
1208 }
1209
1210 void qemu_flush_coalesced_mmio_buffer(void)
1211 {
1212     if (kvm_enabled())
1213         kvm_flush_coalesced_mmio_buffer();
1214 }
1215
1216 void qemu_mutex_lock_ramlist(void)
1217 {
1218     qemu_mutex_lock(&ram_list.mutex);
1219 }
1220
1221 void qemu_mutex_unlock_ramlist(void)
1222 {
1223     qemu_mutex_unlock(&ram_list.mutex);
1224 }
1225
1226 #ifdef __linux__
1227 static int64_t get_file_size(int fd)
1228 {
1229     int64_t size = lseek(fd, 0, SEEK_END);
1230     if (size < 0) {
1231         return -errno;
1232     }
1233     return size;
1234 }
1235
1236 static void *file_ram_alloc(RAMBlock *block,
1237                             ram_addr_t memory,
1238                             const char *path,
1239                             Error **errp)
1240 {
1241     bool unlink_on_error = false;
1242     char *filename;
1243     char *sanitized_name;
1244     char *c;
1245     void * volatile area = MAP_FAILED;
1246     int fd = -1;
1247     int64_t file_size;
1248
1249     if (kvm_enabled() && !kvm_has_sync_mmu()) {
1250         error_setg(errp,
1251                    "host lacks kvm mmu notifiers, -mem-path unsupported");
1252         return NULL;
1253     }
1254
1255     for (;;) {
1256         fd = open(path, O_RDWR);
1257         if (fd >= 0) {
1258             /* @path names an existing file, use it */
1259             break;
1260         }
1261         if (errno == ENOENT) {
1262             /* @path names a file that doesn't exist, create it */
1263             fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1264             if (fd >= 0) {
1265                 unlink_on_error = true;
1266                 break;
1267             }
1268         } else if (errno == EISDIR) {
1269             /* @path names a directory, create a file there */
1270             /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1271             sanitized_name = g_strdup(memory_region_name(block->mr));
1272             for (c = sanitized_name; *c != '\0'; c++) {
1273                 if (*c == '/') {
1274                     *c = '_';
1275                 }
1276             }
1277
1278             filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1279                                        sanitized_name);
1280             g_free(sanitized_name);
1281
1282             fd = mkstemp(filename);
1283             if (fd >= 0) {
1284                 unlink(filename);
1285                 g_free(filename);
1286                 break;
1287             }
1288             g_free(filename);
1289         }
1290         if (errno != EEXIST && errno != EINTR) {
1291             error_setg_errno(errp, errno,
1292                              "can't open backing store %s for guest RAM",
1293                              path);
1294             goto error;
1295         }
1296         /*
1297          * Try again on EINTR and EEXIST.  The latter happens when
1298          * something else creates the file between our two open().
1299          */
1300     }
1301
1302     block->page_size = qemu_fd_getpagesize(fd);
1303     block->mr->align = block->page_size;
1304 #if defined(__s390x__)
1305     if (kvm_enabled()) {
1306         block->mr->align = MAX(block->mr->align, QEMU_VMALLOC_ALIGN);
1307     }
1308 #endif
1309
1310     file_size = get_file_size(fd);
1311
1312     if (memory < block->page_size) {
1313         error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1314                    "or larger than page size 0x%zx",
1315                    memory, block->page_size);
1316         goto error;
1317     }
1318
1319     if (file_size > 0 && file_size < memory) {
1320         error_setg(errp, "backing store %s size 0x%" PRIx64
1321                    " does not match 'size' option 0x" RAM_ADDR_FMT,
1322                    path, file_size, memory);
1323         goto error;
1324     }
1325
1326     memory = ROUND_UP(memory, block->page_size);
1327
1328     /*
1329      * ftruncate is not supported by hugetlbfs in older
1330      * hosts, so don't bother bailing out on errors.
1331      * If anything goes wrong with it under other filesystems,
1332      * mmap will fail.
1333      *
1334      * Do not truncate the non-empty backend file to avoid corrupting
1335      * the existing data in the file. Disabling shrinking is not
1336      * enough. For example, the current vNVDIMM implementation stores
1337      * the guest NVDIMM labels at the end of the backend file. If the
1338      * backend file is later extended, QEMU will not be able to find
1339      * those labels. Therefore, extending the non-empty backend file
1340      * is disabled as well.
1341      */
1342     if (!file_size && ftruncate(fd, memory)) {
1343         perror("ftruncate");
1344     }
1345
1346     area = qemu_ram_mmap(fd, memory, block->mr->align,
1347                          block->flags & RAM_SHARED);
1348     if (area == MAP_FAILED) {
1349         error_setg_errno(errp, errno,
1350                          "unable to map backing store for guest RAM");
1351         goto error;
1352     }
1353
1354     if (mem_prealloc) {
1355         os_mem_prealloc(fd, area, memory, errp);
1356         if (errp && *errp) {
1357             goto error;
1358         }
1359     }
1360
1361     block->fd = fd;
1362     return area;
1363
1364 error:
1365     if (area != MAP_FAILED) {
1366         qemu_ram_munmap(area, memory);
1367     }
1368     if (unlink_on_error) {
1369         unlink(path);
1370     }
1371     if (fd != -1) {
1372         close(fd);
1373     }
1374     return NULL;
1375 }
1376 #endif
1377
1378 /* Called with the ramlist lock held.  */
1379 static ram_addr_t find_ram_offset(ram_addr_t size)
1380 {
1381     RAMBlock *block, *next_block;
1382     ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1383
1384     assert(size != 0); /* it would hand out same offset multiple times */
1385
1386     if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1387         return 0;
1388     }
1389
1390     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1391         ram_addr_t end, next = RAM_ADDR_MAX;
1392
1393         end = block->offset + block->max_length;
1394
1395         QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1396             if (next_block->offset >= end) {
1397                 next = MIN(next, next_block->offset);
1398             }
1399         }
1400         if (next - end >= size && next - end < mingap) {
1401             offset = end;
1402             mingap = next - end;
1403         }
1404     }
1405
1406     if (offset == RAM_ADDR_MAX) {
1407         fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1408                 (uint64_t)size);
1409         abort();
1410     }
1411
1412     return offset;
1413 }
1414
1415 ram_addr_t last_ram_offset(void)
1416 {
1417     RAMBlock *block;
1418     ram_addr_t last = 0;
1419
1420     rcu_read_lock();
1421     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1422         last = MAX(last, block->offset + block->max_length);
1423     }
1424     rcu_read_unlock();
1425     return last;
1426 }
1427
1428 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1429 {
1430     int ret;
1431
1432     /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1433     if (!machine_dump_guest_core(current_machine)) {
1434         ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1435         if (ret) {
1436             perror("qemu_madvise");
1437             fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1438                             "but dump_guest_core=off specified\n");
1439         }
1440     }
1441 }
1442
1443 const char *qemu_ram_get_idstr(RAMBlock *rb)
1444 {
1445     return rb->idstr;
1446 }
1447
1448 /* Called with iothread lock held.  */
1449 void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
1450 {
1451     RAMBlock *block;
1452
1453     assert(new_block);
1454     assert(!new_block->idstr[0]);
1455
1456     if (dev) {
1457         char *id = qdev_get_dev_path(dev);
1458         if (id) {
1459             snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1460             g_free(id);
1461         }
1462     }
1463     pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1464
1465     rcu_read_lock();
1466     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1467         if (block != new_block &&
1468             !strcmp(block->idstr, new_block->idstr)) {
1469             fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1470                     new_block->idstr);
1471             abort();
1472         }
1473     }
1474     rcu_read_unlock();
1475 }
1476
1477 /* Called with iothread lock held.  */
1478 void qemu_ram_unset_idstr(RAMBlock *block)
1479 {
1480     /* FIXME: arch_init.c assumes that this is not called throughout
1481      * migration.  Ignore the problem since hot-unplug during migration
1482      * does not work anyway.
1483      */
1484     if (block) {
1485         memset(block->idstr, 0, sizeof(block->idstr));
1486     }
1487 }
1488
1489 size_t qemu_ram_pagesize(RAMBlock *rb)
1490 {
1491     return rb->page_size;
1492 }
1493
1494 static int memory_try_enable_merging(void *addr, size_t len)
1495 {
1496     if (!machine_mem_merge(current_machine)) {
1497         /* disabled by the user */
1498         return 0;
1499     }
1500
1501     return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1502 }
1503
1504 /* Only legal before guest might have detected the memory size: e.g. on
1505  * incoming migration, or right after reset.
1506  *
1507  * As memory core doesn't know how is memory accessed, it is up to
1508  * resize callback to update device state and/or add assertions to detect
1509  * misuse, if necessary.
1510  */
1511 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
1512 {
1513     assert(block);
1514
1515     newsize = HOST_PAGE_ALIGN(newsize);
1516
1517     if (block->used_length == newsize) {
1518         return 0;
1519     }
1520
1521     if (!(block->flags & RAM_RESIZEABLE)) {
1522         error_setg_errno(errp, EINVAL,
1523                          "Length mismatch: %s: 0x" RAM_ADDR_FMT
1524                          " in != 0x" RAM_ADDR_FMT, block->idstr,
1525                          newsize, block->used_length);
1526         return -EINVAL;
1527     }
1528
1529     if (block->max_length < newsize) {
1530         error_setg_errno(errp, EINVAL,
1531                          "Length too large: %s: 0x" RAM_ADDR_FMT
1532                          " > 0x" RAM_ADDR_FMT, block->idstr,
1533                          newsize, block->max_length);
1534         return -EINVAL;
1535     }
1536
1537     cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1538     block->used_length = newsize;
1539     cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1540                                         DIRTY_CLIENTS_ALL);
1541     memory_region_set_size(block->mr, newsize);
1542     if (block->resized) {
1543         block->resized(block->idstr, newsize, block->host);
1544     }
1545     return 0;
1546 }
1547
1548 /* Called with ram_list.mutex held */
1549 static void dirty_memory_extend(ram_addr_t old_ram_size,
1550                                 ram_addr_t new_ram_size)
1551 {
1552     ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1553                                              DIRTY_MEMORY_BLOCK_SIZE);
1554     ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1555                                              DIRTY_MEMORY_BLOCK_SIZE);
1556     int i;
1557
1558     /* Only need to extend if block count increased */
1559     if (new_num_blocks <= old_num_blocks) {
1560         return;
1561     }
1562
1563     for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1564         DirtyMemoryBlocks *old_blocks;
1565         DirtyMemoryBlocks *new_blocks;
1566         int j;
1567
1568         old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1569         new_blocks = g_malloc(sizeof(*new_blocks) +
1570                               sizeof(new_blocks->blocks[0]) * new_num_blocks);
1571
1572         if (old_num_blocks) {
1573             memcpy(new_blocks->blocks, old_blocks->blocks,
1574                    old_num_blocks * sizeof(old_blocks->blocks[0]));
1575         }
1576
1577         for (j = old_num_blocks; j < new_num_blocks; j++) {
1578             new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1579         }
1580
1581         atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1582
1583         if (old_blocks) {
1584             g_free_rcu(old_blocks, rcu);
1585         }
1586     }
1587 }
1588
1589 static void ram_block_add(RAMBlock *new_block, Error **errp)
1590 {
1591     RAMBlock *block;
1592     RAMBlock *last_block = NULL;
1593     ram_addr_t old_ram_size, new_ram_size;
1594     Error *err = NULL;
1595
1596     old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1597
1598     qemu_mutex_lock_ramlist();
1599     new_block->offset = find_ram_offset(new_block->max_length);
1600
1601     if (!new_block->host) {
1602         if (xen_enabled()) {
1603             xen_ram_alloc(new_block->offset, new_block->max_length,
1604                           new_block->mr, &err);
1605             if (err) {
1606                 error_propagate(errp, err);
1607                 qemu_mutex_unlock_ramlist();
1608                 return;
1609             }
1610         } else {
1611             new_block->host = phys_mem_alloc(new_block->max_length,
1612                                              &new_block->mr->align);
1613             if (!new_block->host) {
1614                 error_setg_errno(errp, errno,
1615                                  "cannot set up guest memory '%s'",
1616                                  memory_region_name(new_block->mr));
1617                 qemu_mutex_unlock_ramlist();
1618                 return;
1619             }
1620             /*
1621              * In Hax, the qemu allocate the virtual address, and HAX kernel
1622              * populate the memory with physical memory. Currently we have no
1623              * paging, so user should make sure enough free memory in advance
1624              */
1625             if (hax_enabled()) {
1626                 int ret;
1627                 ret = hax_populate_ram((uint64_t)(uintptr_t)new_block->host,
1628                                        new_block->max_length);
1629                 if (ret < 0) {
1630                     error_setg(errp, "Hax failed to populate ram");
1631                     return;
1632                 }
1633             }
1634
1635             memory_try_enable_merging(new_block->host, new_block->max_length);
1636         }
1637     }
1638
1639     new_ram_size = MAX(old_ram_size,
1640               (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1641     if (new_ram_size > old_ram_size) {
1642         migration_bitmap_extend(old_ram_size, new_ram_size);
1643         dirty_memory_extend(old_ram_size, new_ram_size);
1644     }
1645     /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
1646      * QLIST (which has an RCU-friendly variant) does not have insertion at
1647      * tail, so save the last element in last_block.
1648      */
1649     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1650         last_block = block;
1651         if (block->max_length < new_block->max_length) {
1652             break;
1653         }
1654     }
1655     if (block) {
1656         QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1657     } else if (last_block) {
1658         QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1659     } else { /* list is empty */
1660         QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1661     }
1662     ram_list.mru_block = NULL;
1663
1664     /* Write list before version */
1665     smp_wmb();
1666     ram_list.version++;
1667     qemu_mutex_unlock_ramlist();
1668
1669     cpu_physical_memory_set_dirty_range(new_block->offset,
1670                                         new_block->used_length,
1671                                         DIRTY_CLIENTS_ALL);
1672
1673     if (new_block->host) {
1674         qemu_ram_setup_dump(new_block->host, new_block->max_length);
1675         qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1676         /* MADV_DONTFORK is also needed by KVM in absence of synchronous MMU */
1677         qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1678     }
1679 }
1680
1681 #ifdef __linux__
1682 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1683                                    bool share, const char *mem_path,
1684                                    Error **errp)
1685 {
1686     RAMBlock *new_block;
1687     Error *local_err = NULL;
1688
1689     if (xen_enabled()) {
1690         error_setg(errp, "-mem-path not supported with Xen");
1691         return NULL;
1692     }
1693
1694     if (phys_mem_alloc != qemu_anon_ram_alloc) {
1695         /*
1696          * file_ram_alloc() needs to allocate just like
1697          * phys_mem_alloc, but we haven't bothered to provide
1698          * a hook there.
1699          */
1700         error_setg(errp,
1701                    "-mem-path not supported with this accelerator");
1702         return NULL;
1703     }
1704
1705     size = HOST_PAGE_ALIGN(size);
1706     new_block = g_malloc0(sizeof(*new_block));
1707     new_block->mr = mr;
1708     new_block->used_length = size;
1709     new_block->max_length = size;
1710     new_block->flags = share ? RAM_SHARED : 0;
1711     new_block->host = file_ram_alloc(new_block, size,
1712                                      mem_path, errp);
1713     if (!new_block->host) {
1714         g_free(new_block);
1715         return NULL;
1716     }
1717
1718     ram_block_add(new_block, &local_err);
1719     if (local_err) {
1720         g_free(new_block);
1721         error_propagate(errp, local_err);
1722         return NULL;
1723     }
1724     return new_block;
1725 }
1726 #endif
1727
1728 static
1729 RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1730                                   void (*resized)(const char*,
1731                                                   uint64_t length,
1732                                                   void *host),
1733                                   void *host, bool resizeable,
1734                                   MemoryRegion *mr, Error **errp)
1735 {
1736     RAMBlock *new_block;
1737     Error *local_err = NULL;
1738
1739     size = HOST_PAGE_ALIGN(size);
1740     max_size = HOST_PAGE_ALIGN(max_size);
1741     new_block = g_malloc0(sizeof(*new_block));
1742     new_block->mr = mr;
1743     new_block->resized = resized;
1744     new_block->used_length = size;
1745     new_block->max_length = max_size;
1746     assert(max_size >= size);
1747     new_block->fd = -1;
1748     new_block->page_size = getpagesize();
1749     new_block->host = host;
1750     if (host) {
1751         new_block->flags |= RAM_PREALLOC;
1752     }
1753     if (resizeable) {
1754         new_block->flags |= RAM_RESIZEABLE;
1755     }
1756     ram_block_add(new_block, &local_err);
1757     if (local_err) {
1758         g_free(new_block);
1759         error_propagate(errp, local_err);
1760         return NULL;
1761     }
1762     return new_block;
1763 }
1764
1765 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1766                                    MemoryRegion *mr, Error **errp)
1767 {
1768     return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1769 }
1770
1771 RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1772 {
1773     return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1774 }
1775
1776 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1777                                      void (*resized)(const char*,
1778                                                      uint64_t length,
1779                                                      void *host),
1780                                      MemoryRegion *mr, Error **errp)
1781 {
1782     return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1783 }
1784
1785 static void reclaim_ramblock(RAMBlock *block)
1786 {
1787     if (block->flags & RAM_PREALLOC) {
1788         ;
1789     } else if (xen_enabled()) {
1790         xen_invalidate_map_cache_entry(block->host);
1791 #ifndef _WIN32
1792     } else if (block->fd >= 0) {
1793         qemu_ram_munmap(block->host, block->max_length);
1794         close(block->fd);
1795 #endif
1796     } else {
1797         qemu_anon_ram_free(block->host, block->max_length);
1798     }
1799     g_free(block);
1800 }
1801
1802 void qemu_ram_free(RAMBlock *block)
1803 {
1804     if (!block) {
1805         return;
1806     }
1807
1808     qemu_mutex_lock_ramlist();
1809     QLIST_REMOVE_RCU(block, next);
1810     ram_list.mru_block = NULL;
1811     /* Write list before version */
1812     smp_wmb();
1813     ram_list.version++;
1814     call_rcu(block, reclaim_ramblock, rcu);
1815     qemu_mutex_unlock_ramlist();
1816 }
1817
1818 #ifndef _WIN32
1819 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1820 {
1821     RAMBlock *block;
1822     ram_addr_t offset;
1823     int flags;
1824     void *area, *vaddr;
1825
1826     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1827         offset = addr - block->offset;
1828         if (offset < block->max_length) {
1829             vaddr = ramblock_ptr(block, offset);
1830             if (block->flags & RAM_PREALLOC) {
1831                 ;
1832             } else if (xen_enabled()) {
1833                 abort();
1834             } else {
1835                 flags = MAP_FIXED;
1836                 if (block->fd >= 0) {
1837                     flags |= (block->flags & RAM_SHARED ?
1838                               MAP_SHARED : MAP_PRIVATE);
1839                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1840                                 flags, block->fd, offset);
1841                 } else {
1842                     /*
1843                      * Remap needs to match alloc.  Accelerators that
1844                      * set phys_mem_alloc never remap.  If they did,
1845                      * we'd need a remap hook here.
1846                      */
1847                     assert(phys_mem_alloc == qemu_anon_ram_alloc);
1848
1849                     flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1850                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1851                                 flags, -1, 0);
1852                 }
1853                 if (area != vaddr) {
1854                     fprintf(stderr, "Could not remap addr: "
1855                             RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1856                             length, addr);
1857                     exit(1);
1858                 }
1859                 memory_try_enable_merging(vaddr, length);
1860                 qemu_ram_setup_dump(vaddr, length);
1861             }
1862         }
1863     }
1864 }
1865 #endif /* !_WIN32 */
1866
1867 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1868  * This should not be used for general purpose DMA.  Use address_space_map
1869  * or address_space_rw instead. For local memory (e.g. video ram) that the
1870  * device owns, use memory_region_get_ram_ptr.
1871  *
1872  * Called within RCU critical section.
1873  */
1874 void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1875 {
1876     RAMBlock *block = ram_block;
1877
1878     if (block == NULL) {
1879         block = qemu_get_ram_block(addr);
1880         addr -= block->offset;
1881     }
1882
1883     if (xen_enabled() && block->host == NULL) {
1884         /* We need to check if the requested address is in the RAM
1885          * because we don't want to map the entire memory in QEMU.
1886          * In that case just map until the end of the page.
1887          */
1888         if (block->offset == 0) {
1889             return xen_map_cache(addr, 0, 0);
1890         }
1891
1892         block->host = xen_map_cache(block->offset, block->max_length, 1);
1893     }
1894     return ramblock_ptr(block, addr);
1895 }
1896
1897 /* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr
1898  * but takes a size argument.
1899  *
1900  * Called within RCU critical section.
1901  */
1902 static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1903                                  hwaddr *size)
1904 {
1905     RAMBlock *block = ram_block;
1906     if (*size == 0) {
1907         return NULL;
1908     }
1909
1910     if (block == NULL) {
1911         block = qemu_get_ram_block(addr);
1912         addr -= block->offset;
1913     }
1914     *size = MIN(*size, block->max_length - addr);
1915
1916     if (xen_enabled() && block->host == NULL) {
1917         /* We need to check if the requested address is in the RAM
1918          * because we don't want to map the entire memory in QEMU.
1919          * In that case just map the requested area.
1920          */
1921         if (block->offset == 0) {
1922             return xen_map_cache(addr, *size, 1);
1923         }
1924
1925         block->host = xen_map_cache(block->offset, block->max_length, 1);
1926     }
1927
1928     return ramblock_ptr(block, addr);
1929 }
1930
1931 /*
1932  * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1933  * in that RAMBlock.
1934  *
1935  * ptr: Host pointer to look up
1936  * round_offset: If true round the result offset down to a page boundary
1937  * *ram_addr: set to result ram_addr
1938  * *offset: set to result offset within the RAMBlock
1939  *
1940  * Returns: RAMBlock (or NULL if not found)
1941  *
1942  * By the time this function returns, the returned pointer is not protected
1943  * by RCU anymore.  If the caller is not within an RCU critical section and
1944  * does not hold the iothread lock, it must have other means of protecting the
1945  * pointer, such as a reference to the region that includes the incoming
1946  * ram_addr_t.
1947  */
1948 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1949                                    ram_addr_t *offset)
1950 {
1951     RAMBlock *block;
1952     uint8_t *host = ptr;
1953
1954     if (xen_enabled()) {
1955         ram_addr_t ram_addr;
1956         rcu_read_lock();
1957         ram_addr = xen_ram_addr_from_mapcache(ptr);
1958         block = qemu_get_ram_block(ram_addr);
1959         if (block) {
1960             *offset = ram_addr - block->offset;
1961         }
1962         rcu_read_unlock();
1963         return block;
1964     }
1965
1966     rcu_read_lock();
1967     block = atomic_rcu_read(&ram_list.mru_block);
1968     if (block && block->host && host - block->host < block->max_length) {
1969         goto found;
1970     }
1971
1972     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1973         /* This case append when the block is not mapped. */
1974         if (block->host == NULL) {
1975             continue;
1976         }
1977         if (host - block->host < block->max_length) {
1978             goto found;
1979         }
1980     }
1981
1982     rcu_read_unlock();
1983     return NULL;
1984
1985 found:
1986     *offset = (host - block->host);
1987     if (round_offset) {
1988         *offset &= TARGET_PAGE_MASK;
1989     }
1990     rcu_read_unlock();
1991     return block;
1992 }
1993
1994 /*
1995  * Finds the named RAMBlock
1996  *
1997  * name: The name of RAMBlock to find
1998  *
1999  * Returns: RAMBlock (or NULL if not found)
2000  */
2001 RAMBlock *qemu_ram_block_by_name(const char *name)
2002 {
2003     RAMBlock *block;
2004
2005     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2006         if (!strcmp(name, block->idstr)) {
2007             return block;
2008         }
2009     }
2010
2011     return NULL;
2012 }
2013
2014 /* Some of the softmmu routines need to translate from a host pointer
2015    (typically a TLB entry) back to a ram offset.  */
2016 ram_addr_t qemu_ram_addr_from_host(void *ptr)
2017 {
2018     RAMBlock *block;
2019     ram_addr_t offset;
2020
2021     block = qemu_ram_block_from_host(ptr, false, &offset);
2022     if (!block) {
2023         return RAM_ADDR_INVALID;
2024     }
2025
2026     return block->offset + offset;
2027 }
2028
2029 /* Called within RCU critical section.  */
2030 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2031                                uint64_t val, unsigned size)
2032 {
2033     bool locked = false;
2034
2035     if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
2036         locked = true;
2037         tb_lock();
2038         tb_invalidate_phys_page_fast(ram_addr, size);
2039     }
2040     switch (size) {
2041     case 1:
2042         stb_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2043         break;
2044     case 2:
2045         stw_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2046         break;
2047     case 4:
2048         stl_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2049         break;
2050     default:
2051         abort();
2052     }
2053
2054     if (locked) {
2055         tb_unlock();
2056     }
2057
2058     /* Set both VGA and migration bits for simplicity and to remove
2059      * the notdirty callback faster.
2060      */
2061     cpu_physical_memory_set_dirty_range(ram_addr, size,
2062                                         DIRTY_CLIENTS_NOCODE);
2063     /* we remove the notdirty callback only if the code has been
2064        flushed */
2065     if (!cpu_physical_memory_is_clean(ram_addr)) {
2066         tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2067     }
2068 }
2069
2070 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2071                                  unsigned size, bool is_write)
2072 {
2073     return is_write;
2074 }
2075
2076 static const MemoryRegionOps notdirty_mem_ops = {
2077     .write = notdirty_mem_write,
2078     .valid.accepts = notdirty_mem_accepts,
2079     .endianness = DEVICE_NATIVE_ENDIAN,
2080 };
2081
2082 /* Generate a debug exception if a watchpoint has been hit.  */
2083 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2084 {
2085     CPUState *cpu = current_cpu;
2086     CPUClass *cc = CPU_GET_CLASS(cpu);
2087     CPUArchState *env = cpu->env_ptr;
2088     target_ulong pc, cs_base;
2089     target_ulong vaddr;
2090     CPUWatchpoint *wp;
2091     uint32_t cpu_flags;
2092
2093     if (cpu->watchpoint_hit) {
2094         /* We re-entered the check after replacing the TB. Now raise
2095          * the debug interrupt so that is will trigger after the
2096          * current instruction. */
2097         cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2098         return;
2099     }
2100     vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2101     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2102         if (cpu_watchpoint_address_matches(wp, vaddr, len)
2103             && (wp->flags & flags)) {
2104             if (flags == BP_MEM_READ) {
2105                 wp->flags |= BP_WATCHPOINT_HIT_READ;
2106             } else {
2107                 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2108             }
2109             wp->hitaddr = vaddr;
2110             wp->hitattrs = attrs;
2111             if (!cpu->watchpoint_hit) {
2112                 if (wp->flags & BP_CPU &&
2113                     !cc->debug_check_watchpoint(cpu, wp)) {
2114                     wp->flags &= ~BP_WATCHPOINT_HIT;
2115                     continue;
2116                 }
2117                 cpu->watchpoint_hit = wp;
2118
2119                 /* The tb_lock will be reset when cpu_loop_exit or
2120                  * cpu_loop_exit_noexc longjmp back into the cpu_exec
2121                  * main loop.
2122                  */
2123                 tb_lock();
2124                 tb_check_watchpoint(cpu);
2125                 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2126                     cpu->exception_index = EXCP_DEBUG;
2127                     cpu_loop_exit(cpu);
2128                 } else {
2129                     cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2130                     tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2131                     cpu_loop_exit_noexc(cpu);
2132                 }
2133             }
2134         } else {
2135             wp->flags &= ~BP_WATCHPOINT_HIT;
2136         }
2137     }
2138 }
2139
2140 /* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
2141    so these check for a hit then pass through to the normal out-of-line
2142    phys routines.  */
2143 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2144                                   unsigned size, MemTxAttrs attrs)
2145 {
2146     MemTxResult res;
2147     uint64_t data;
2148     int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2149     AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2150
2151     check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2152     switch (size) {
2153     case 1:
2154         data = address_space_ldub(as, addr, attrs, &res);
2155         break;
2156     case 2:
2157         data = address_space_lduw(as, addr, attrs, &res);
2158         break;
2159     case 4:
2160         data = address_space_ldl(as, addr, attrs, &res);
2161         break;
2162     default: abort();
2163     }
2164     *pdata = data;
2165     return res;
2166 }
2167
2168 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2169                                    uint64_t val, unsigned size,
2170                                    MemTxAttrs attrs)
2171 {
2172     MemTxResult res;
2173     int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2174     AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2175
2176     check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2177     switch (size) {
2178     case 1:
2179         address_space_stb(as, addr, val, attrs, &res);
2180         break;
2181     case 2:
2182         address_space_stw(as, addr, val, attrs, &res);
2183         break;
2184     case 4:
2185         address_space_stl(as, addr, val, attrs, &res);
2186         break;
2187     default: abort();
2188     }
2189     return res;
2190 }
2191
2192 static const MemoryRegionOps watch_mem_ops = {
2193     .read_with_attrs = watch_mem_read,
2194     .write_with_attrs = watch_mem_write,
2195     .endianness = DEVICE_NATIVE_ENDIAN,
2196 };
2197
2198 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2199                                 unsigned len, MemTxAttrs attrs)
2200 {
2201     subpage_t *subpage = opaque;
2202     uint8_t buf[8];
2203     MemTxResult res;
2204
2205 #if defined(DEBUG_SUBPAGE)
2206     printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2207            subpage, len, addr);
2208 #endif
2209     res = address_space_read(subpage->as, addr + subpage->base,
2210                              attrs, buf, len);
2211     if (res) {
2212         return res;
2213     }
2214     switch (len) {
2215     case 1:
2216         *data = ldub_p(buf);
2217         return MEMTX_OK;
2218     case 2:
2219         *data = lduw_p(buf);
2220         return MEMTX_OK;
2221     case 4:
2222         *data = ldl_p(buf);
2223         return MEMTX_OK;
2224     case 8:
2225         *data = ldq_p(buf);
2226         return MEMTX_OK;
2227     default:
2228         abort();
2229     }
2230 }
2231
2232 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2233                                  uint64_t value, unsigned len, MemTxAttrs attrs)
2234 {
2235     subpage_t *subpage = opaque;
2236     uint8_t buf[8];
2237
2238 #if defined(DEBUG_SUBPAGE)
2239     printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2240            " value %"PRIx64"\n",
2241            __func__, subpage, len, addr, value);
2242 #endif
2243     switch (len) {
2244     case 1:
2245         stb_p(buf, value);
2246         break;
2247     case 2:
2248         stw_p(buf, value);
2249         break;
2250     case 4:
2251         stl_p(buf, value);
2252         break;
2253     case 8:
2254         stq_p(buf, value);
2255         break;
2256     default:
2257         abort();
2258     }
2259     return address_space_write(subpage->as, addr + subpage->base,
2260                                attrs, buf, len);
2261 }
2262
2263 static bool subpage_accepts(void *opaque, hwaddr addr,
2264                             unsigned len, bool is_write)
2265 {
2266     subpage_t *subpage = opaque;
2267 #if defined(DEBUG_SUBPAGE)
2268     printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2269            __func__, subpage, is_write ? 'w' : 'r', len, addr);
2270 #endif
2271
2272     return address_space_access_valid(subpage->as, addr + subpage->base,
2273                                       len, is_write);
2274 }
2275
2276 static const MemoryRegionOps subpage_ops = {
2277     .read_with_attrs = subpage_read,
2278     .write_with_attrs = subpage_write,
2279     .impl.min_access_size = 1,
2280     .impl.max_access_size = 8,
2281     .valid.min_access_size = 1,
2282     .valid.max_access_size = 8,
2283     .valid.accepts = subpage_accepts,
2284     .endianness = DEVICE_NATIVE_ENDIAN,
2285 };
2286
2287 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2288                              uint16_t section)
2289 {
2290     int idx, eidx;
2291
2292     if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2293         return -1;
2294     idx = SUBPAGE_IDX(start);
2295     eidx = SUBPAGE_IDX(end);
2296 #if defined(DEBUG_SUBPAGE)
2297     printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2298            __func__, mmio, start, end, idx, eidx, section);
2299 #endif
2300     for (; idx <= eidx; idx++) {
2301         mmio->sub_section[idx] = section;
2302     }
2303
2304     return 0;
2305 }
2306
2307 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2308 {
2309     subpage_t *mmio;
2310
2311     mmio = g_malloc0(sizeof(subpage_t) + TARGET_PAGE_SIZE * sizeof(uint16_t));
2312     mmio->as = as;
2313     mmio->base = base;
2314     memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2315                           NULL, TARGET_PAGE_SIZE);
2316     mmio->iomem.subpage = true;
2317 #if defined(DEBUG_SUBPAGE)
2318     printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2319            mmio, base, TARGET_PAGE_SIZE);
2320 #endif
2321     subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2322
2323     return mmio;
2324 }
2325
2326 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2327                               MemoryRegion *mr)
2328 {
2329     assert(as);
2330     MemoryRegionSection section = {
2331         .address_space = as,
2332         .mr = mr,
2333         .offset_within_address_space = 0,
2334         .offset_within_region = 0,
2335         .size = int128_2_64(),
2336     };
2337
2338     return phys_section_add(map, &section);
2339 }
2340
2341 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2342 {
2343     int asidx = cpu_asidx_from_attrs(cpu, attrs);
2344     CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2345     AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2346     MemoryRegionSection *sections = d->map.sections;
2347
2348     return sections[index & ~TARGET_PAGE_MASK].mr;
2349 }
2350
2351 static void io_mem_init(void)
2352 {
2353     memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2354     memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2355                           NULL, UINT64_MAX);
2356     memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2357                           NULL, UINT64_MAX);
2358     memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2359                           NULL, UINT64_MAX);
2360 }
2361
2362 static void mem_begin(MemoryListener *listener)
2363 {
2364     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2365     AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2366     uint16_t n;
2367
2368     n = dummy_section(&d->map, as, &io_mem_unassigned);
2369     assert(n == PHYS_SECTION_UNASSIGNED);
2370     n = dummy_section(&d->map, as, &io_mem_notdirty);
2371     assert(n == PHYS_SECTION_NOTDIRTY);
2372     n = dummy_section(&d->map, as, &io_mem_rom);
2373     assert(n == PHYS_SECTION_ROM);
2374     n = dummy_section(&d->map, as, &io_mem_watch);
2375     assert(n == PHYS_SECTION_WATCH);
2376
2377     d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2378     d->as = as;
2379     as->next_dispatch = d;
2380 }
2381
2382 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2383 {
2384     phys_sections_free(&d->map);
2385     g_free(d);
2386 }
2387
2388 static void mem_commit(MemoryListener *listener)
2389 {
2390     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2391     AddressSpaceDispatch *cur = as->dispatch;
2392     AddressSpaceDispatch *next = as->next_dispatch;
2393
2394     phys_page_compact_all(next, next->map.nodes_nb);
2395
2396     atomic_rcu_set(&as->dispatch, next);
2397     if (cur) {
2398         call_rcu(cur, address_space_dispatch_free, rcu);
2399     }
2400 }
2401
2402 static void tcg_commit(MemoryListener *listener)
2403 {
2404     CPUAddressSpace *cpuas;
2405     AddressSpaceDispatch *d;
2406
2407     /* since each CPU stores ram addresses in its TLB cache, we must
2408        reset the modified entries */
2409     cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2410     cpu_reloading_memory_map();
2411     /* The CPU and TLB are protected by the iothread lock.
2412      * We reload the dispatch pointer now because cpu_reloading_memory_map()
2413      * may have split the RCU critical section.
2414      */
2415     d = atomic_rcu_read(&cpuas->as->dispatch);
2416     atomic_rcu_set(&cpuas->memory_dispatch, d);
2417     tlb_flush(cpuas->cpu, 1);
2418 }
2419
2420 void address_space_init_dispatch(AddressSpace *as)
2421 {
2422     as->dispatch = NULL;
2423     as->dispatch_listener = (MemoryListener) {
2424         .begin = mem_begin,
2425         .commit = mem_commit,
2426         .region_add = mem_add,
2427         .region_nop = mem_add,
2428         .priority = 0,
2429     };
2430     memory_listener_register(&as->dispatch_listener, as);
2431 }
2432
2433 void address_space_unregister(AddressSpace *as)
2434 {
2435     memory_listener_unregister(&as->dispatch_listener);
2436 }
2437
2438 void address_space_destroy_dispatch(AddressSpace *as)
2439 {
2440     AddressSpaceDispatch *d = as->dispatch;
2441
2442     atomic_rcu_set(&as->dispatch, NULL);
2443     if (d) {
2444         call_rcu(d, address_space_dispatch_free, rcu);
2445     }
2446 }
2447
2448 static void memory_map_init(void)
2449 {
2450     system_memory = g_malloc(sizeof(*system_memory));
2451
2452     memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2453     address_space_init(&address_space_memory, system_memory, "memory");
2454
2455     system_io = g_malloc(sizeof(*system_io));
2456     memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2457                           65536);
2458     address_space_init(&address_space_io, system_io, "I/O");
2459 }
2460
2461 MemoryRegion *get_system_memory(void)
2462 {
2463     return system_memory;
2464 }
2465
2466 MemoryRegion *get_system_io(void)
2467 {
2468     return system_io;
2469 }
2470
2471 #endif /* !defined(CONFIG_USER_ONLY) */
2472
2473 /* physical memory access (slow version, mainly for debug) */
2474 #if defined(CONFIG_USER_ONLY)
2475 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2476                         uint8_t *buf, int len, int is_write)
2477 {
2478     int l, flags;
2479     target_ulong page;
2480     void * p;
2481
2482     while (len > 0) {
2483         page = addr & TARGET_PAGE_MASK;
2484         l = (page + TARGET_PAGE_SIZE) - addr;
2485         if (l > len)
2486             l = len;
2487         flags = page_get_flags(page);
2488         if (!(flags & PAGE_VALID))
2489             return -1;
2490         if (is_write) {
2491             if (!(flags & PAGE_WRITE))
2492                 return -1;
2493             /* XXX: this code should not depend on lock_user */
2494             if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2495                 return -1;
2496             memcpy(p, buf, l);
2497             unlock_user(p, addr, l);
2498         } else {
2499             if (!(flags & PAGE_READ))
2500                 return -1;
2501             /* XXX: this code should not depend on lock_user */
2502             if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2503                 return -1;
2504             memcpy(buf, p, l);
2505             unlock_user(p, addr, 0);
2506         }
2507         len -= l;
2508         buf += l;
2509         addr += l;
2510     }
2511     return 0;
2512 }
2513
2514 #else
2515
2516 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2517                                      hwaddr length)
2518 {
2519     uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2520     addr += memory_region_get_ram_addr(mr);
2521
2522     /* No early return if dirty_log_mask is or becomes 0, because
2523      * cpu_physical_memory_set_dirty_range will still call
2524      * xen_modified_memory.
2525      */
2526     if (dirty_log_mask) {
2527         dirty_log_mask =
2528             cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2529     }
2530     if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2531         tb_lock();
2532         tb_invalidate_phys_range(addr, addr + length);
2533         tb_unlock();
2534         dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2535     }
2536     cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2537 }
2538
2539 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2540 {
2541     unsigned access_size_max = mr->ops->valid.max_access_size;
2542
2543     /* Regions are assumed to support 1-4 byte accesses unless
2544        otherwise specified.  */
2545     if (access_size_max == 0) {
2546         access_size_max = 4;
2547     }
2548
2549     /* Bound the maximum access by the alignment of the address.  */
2550     if (!mr->ops->impl.unaligned) {
2551         unsigned align_size_max = addr & -addr;
2552         if (align_size_max != 0 && align_size_max < access_size_max) {
2553             access_size_max = align_size_max;
2554         }
2555     }
2556
2557     /* Don't attempt accesses larger than the maximum.  */
2558     if (l > access_size_max) {
2559         l = access_size_max;
2560     }
2561     l = pow2floor(l);
2562
2563     return l;
2564 }
2565
2566 static bool prepare_mmio_access(MemoryRegion *mr)
2567 {
2568     bool unlocked = !qemu_mutex_iothread_locked();
2569     bool release_lock = false;
2570
2571     if (unlocked && mr->global_locking) {
2572         qemu_mutex_lock_iothread();
2573         unlocked = false;
2574         release_lock = true;
2575     }
2576     if (mr->flush_coalesced_mmio) {
2577         if (unlocked) {
2578             qemu_mutex_lock_iothread();
2579         }
2580         qemu_flush_coalesced_mmio_buffer();
2581         if (unlocked) {
2582             qemu_mutex_unlock_iothread();
2583         }
2584     }
2585
2586     return release_lock;
2587 }
2588
2589 /* Called within RCU critical section.  */
2590 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2591                                                 MemTxAttrs attrs,
2592                                                 const uint8_t *buf,
2593                                                 int len, hwaddr addr1,
2594                                                 hwaddr l, MemoryRegion *mr)
2595 {
2596     uint8_t *ptr;
2597     uint64_t val;
2598     MemTxResult result = MEMTX_OK;
2599     bool release_lock = false;
2600
2601     for (;;) {
2602         if (!memory_access_is_direct(mr, true)) {
2603             release_lock |= prepare_mmio_access(mr);
2604             l = memory_access_size(mr, l, addr1);
2605             /* XXX: could force current_cpu to NULL to avoid
2606                potential bugs */
2607             switch (l) {
2608             case 8:
2609                 /* 64 bit write access */
2610                 val = ldq_p(buf);
2611                 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2612                                                        attrs);
2613                 break;
2614             case 4:
2615                 /* 32 bit write access */
2616                 val = ldl_p(buf);
2617                 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2618                                                        attrs);
2619                 break;
2620             case 2:
2621                 /* 16 bit write access */
2622                 val = lduw_p(buf);
2623                 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2624                                                        attrs);
2625                 break;
2626             case 1:
2627                 /* 8 bit write access */
2628                 val = ldub_p(buf);
2629                 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2630                                                        attrs);
2631                 break;
2632             default:
2633                 abort();
2634             }
2635         } else {
2636             /* RAM case */
2637             ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2638             memcpy(ptr, buf, l);
2639             invalidate_and_set_dirty(mr, addr1, l);
2640         }
2641
2642         if (release_lock) {
2643             qemu_mutex_unlock_iothread();
2644             release_lock = false;
2645         }
2646
2647         len -= l;
2648         buf += l;
2649         addr += l;
2650
2651         if (!len) {
2652             break;
2653         }
2654
2655         l = len;
2656         mr = address_space_translate(as, addr, &addr1, &l, true);
2657     }
2658
2659     return result;
2660 }
2661
2662 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2663                                 const uint8_t *buf, int len)
2664 {
2665     hwaddr l;
2666     hwaddr addr1;
2667     MemoryRegion *mr;
2668     MemTxResult result = MEMTX_OK;
2669
2670     if (len > 0) {
2671         rcu_read_lock();
2672         l = len;
2673         mr = address_space_translate(as, addr, &addr1, &l, true);
2674         result = address_space_write_continue(as, addr, attrs, buf, len,
2675                                               addr1, l, mr);
2676         rcu_read_unlock();
2677     }
2678
2679     return result;
2680 }
2681
2682 /* Called within RCU critical section.  */
2683 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2684                                         MemTxAttrs attrs, uint8_t *buf,
2685                                         int len, hwaddr addr1, hwaddr l,
2686                                         MemoryRegion *mr)
2687 {
2688     uint8_t *ptr;
2689     uint64_t val;
2690     MemTxResult result = MEMTX_OK;
2691     bool release_lock = false;
2692
2693     for (;;) {
2694         if (!memory_access_is_direct(mr, false)) {
2695             /* I/O case */
2696             release_lock |= prepare_mmio_access(mr);
2697             l = memory_access_size(mr, l, addr1);
2698             switch (l) {
2699             case 8:
2700                 /* 64 bit read access */
2701                 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2702                                                       attrs);
2703                 stq_p(buf, val);
2704                 break;
2705             case 4:
2706                 /* 32 bit read access */
2707                 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2708                                                       attrs);
2709                 stl_p(buf, val);
2710                 break;
2711             case 2:
2712                 /* 16 bit read access */
2713                 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2714                                                       attrs);
2715                 stw_p(buf, val);
2716                 break;
2717             case 1:
2718                 /* 8 bit read access */
2719                 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2720                                                       attrs);
2721                 stb_p(buf, val);
2722                 break;
2723             default:
2724                 abort();
2725             }
2726         } else {
2727             /* RAM case */
2728             ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2729             memcpy(buf, ptr, l);
2730         }
2731
2732         if (release_lock) {
2733             qemu_mutex_unlock_iothread();
2734             release_lock = false;
2735         }
2736
2737         len -= l;
2738         buf += l;
2739         addr += l;
2740
2741         if (!len) {
2742             break;
2743         }
2744
2745         l = len;
2746         mr = address_space_translate(as, addr, &addr1, &l, false);
2747     }
2748
2749     return result;
2750 }
2751
2752 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2753                                     MemTxAttrs attrs, uint8_t *buf, int len)
2754 {
2755     hwaddr l;
2756     hwaddr addr1;
2757     MemoryRegion *mr;
2758     MemTxResult result = MEMTX_OK;
2759
2760     if (len > 0) {
2761         rcu_read_lock();
2762         l = len;
2763         mr = address_space_translate(as, addr, &addr1, &l, false);
2764         result = address_space_read_continue(as, addr, attrs, buf, len,
2765                                              addr1, l, mr);
2766         rcu_read_unlock();
2767     }
2768
2769     return result;
2770 }
2771
2772 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2773                              uint8_t *buf, int len, bool is_write)
2774 {
2775     if (is_write) {
2776         return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2777     } else {
2778         return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2779     }
2780 }
2781
2782 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2783                             int len, int is_write)
2784 {
2785     address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2786                      buf, len, is_write);
2787 }
2788
2789 enum write_rom_type {
2790     WRITE_DATA,
2791     FLUSH_CACHE,
2792 };
2793
2794 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2795     hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2796 {
2797     hwaddr l;
2798     uint8_t *ptr;
2799     hwaddr addr1;
2800     MemoryRegion *mr;
2801
2802     rcu_read_lock();
2803     while (len > 0) {
2804         l = len;
2805         mr = address_space_translate(as, addr, &addr1, &l, true);
2806
2807         if (!(memory_region_is_ram(mr) ||
2808               memory_region_is_romd(mr))) {
2809             l = memory_access_size(mr, l, addr1);
2810         } else {
2811             /* ROM/RAM case */
2812             ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2813             switch (type) {
2814             case WRITE_DATA:
2815                 memcpy(ptr, buf, l);
2816                 invalidate_and_set_dirty(mr, addr1, l);
2817                 break;
2818             case FLUSH_CACHE:
2819                 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2820                 break;
2821             }
2822         }
2823         len -= l;
2824         buf += l;
2825         addr += l;
2826     }
2827     rcu_read_unlock();
2828 }
2829
2830 /* used for ROM loading : can write in RAM and ROM */
2831 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2832                                    const uint8_t *buf, int len)
2833 {
2834     cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2835 }
2836
2837 void cpu_flush_icache_range(hwaddr start, int len)
2838 {
2839     /*
2840      * This function should do the same thing as an icache flush that was
2841      * triggered from within the guest. For TCG we are always cache coherent,
2842      * so there is no need to flush anything. For KVM / Xen we need to flush
2843      * the host's instruction cache at least.
2844      */
2845     if (tcg_enabled()) {
2846         return;
2847     }
2848
2849     cpu_physical_memory_write_rom_internal(&address_space_memory,
2850                                            start, NULL, len, FLUSH_CACHE);
2851 }
2852
2853 typedef struct {
2854     MemoryRegion *mr;
2855     void *buffer;
2856     hwaddr addr;
2857     hwaddr len;
2858     bool in_use;
2859 } BounceBuffer;
2860
2861 static BounceBuffer bounce;
2862
2863 typedef struct MapClient {
2864     QEMUBH *bh;
2865     QLIST_ENTRY(MapClient) link;
2866 } MapClient;
2867
2868 QemuMutex map_client_list_lock;
2869 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2870     = QLIST_HEAD_INITIALIZER(map_client_list);
2871
2872 static void cpu_unregister_map_client_do(MapClient *client)
2873 {
2874     QLIST_REMOVE(client, link);
2875     g_free(client);
2876 }
2877
2878 static void cpu_notify_map_clients_locked(void)
2879 {
2880     MapClient *client;
2881
2882     while (!QLIST_EMPTY(&map_client_list)) {
2883         client = QLIST_FIRST(&map_client_list);
2884         qemu_bh_schedule(client->bh);
2885         cpu_unregister_map_client_do(client);
2886     }
2887 }
2888
2889 void cpu_register_map_client(QEMUBH *bh)
2890 {
2891     MapClient *client = g_malloc(sizeof(*client));
2892
2893     qemu_mutex_lock(&map_client_list_lock);
2894     client->bh = bh;
2895     QLIST_INSERT_HEAD(&map_client_list, client, link);
2896     if (!atomic_read(&bounce.in_use)) {
2897         cpu_notify_map_clients_locked();
2898     }
2899     qemu_mutex_unlock(&map_client_list_lock);
2900 }
2901
2902 void cpu_exec_init_all(void)
2903 {
2904     qemu_mutex_init(&ram_list.mutex);
2905     /* The data structures we set up here depend on knowing the page size,
2906      * so no more changes can be made after this point.
2907      * In an ideal world, nothing we did before we had finished the
2908      * machine setup would care about the target page size, and we could
2909      * do this much later, rather than requiring board models to state
2910      * up front what their requirements are.
2911      */
2912     finalize_target_page_bits();
2913     io_mem_init();
2914     memory_map_init();
2915     qemu_mutex_init(&map_client_list_lock);
2916 }
2917
2918 void cpu_unregister_map_client(QEMUBH *bh)
2919 {
2920     MapClient *client;
2921
2922     qemu_mutex_lock(&map_client_list_lock);
2923     QLIST_FOREACH(client, &map_client_list, link) {
2924         if (client->bh == bh) {
2925             cpu_unregister_map_client_do(client);
2926             break;
2927         }
2928     }
2929     qemu_mutex_unlock(&map_client_list_lock);
2930 }
2931
2932 static void cpu_notify_map_clients(void)
2933 {
2934     qemu_mutex_lock(&map_client_list_lock);
2935     cpu_notify_map_clients_locked();
2936     qemu_mutex_unlock(&map_client_list_lock);
2937 }
2938
2939 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2940 {
2941     MemoryRegion *mr;
2942     hwaddr l, xlat;
2943
2944     rcu_read_lock();
2945     while (len > 0) {
2946         l = len;
2947         mr = address_space_translate(as, addr, &xlat, &l, is_write);
2948         if (!memory_access_is_direct(mr, is_write)) {
2949             l = memory_access_size(mr, l, addr);
2950             if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2951                 return false;
2952             }
2953         }
2954
2955         len -= l;
2956         addr += l;
2957     }
2958     rcu_read_unlock();
2959     return true;
2960 }
2961
2962 /* Map a physical memory region into a host virtual address.
2963  * May map a subset of the requested range, given by and returned in *plen.
2964  * May return NULL if resources needed to perform the mapping are exhausted.
2965  * Use only for reads OR writes - not for read-modify-write operations.
2966  * Use cpu_register_map_client() to know when retrying the map operation is
2967  * likely to succeed.
2968  */
2969 void *address_space_map(AddressSpace *as,
2970                         hwaddr addr,
2971                         hwaddr *plen,
2972                         bool is_write)
2973 {
2974     hwaddr len = *plen;
2975     hwaddr done = 0;
2976     hwaddr l, xlat, base;
2977     MemoryRegion *mr, *this_mr;
2978     void *ptr;
2979
2980     if (len == 0) {
2981         return NULL;
2982     }
2983
2984     l = len;
2985     rcu_read_lock();
2986     mr = address_space_translate(as, addr, &xlat, &l, is_write);
2987
2988     if (!memory_access_is_direct(mr, is_write)) {
2989         if (atomic_xchg(&bounce.in_use, true)) {
2990             rcu_read_unlock();
2991             return NULL;
2992         }
2993         /* Avoid unbounded allocations */
2994         l = MIN(l, TARGET_PAGE_SIZE);
2995         bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2996         bounce.addr = addr;
2997         bounce.len = l;
2998
2999         memory_region_ref(mr);
3000         bounce.mr = mr;
3001         if (!is_write) {
3002             address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
3003                                bounce.buffer, l);
3004         }
3005
3006         rcu_read_unlock();
3007         *plen = l;
3008         return bounce.buffer;
3009     }
3010
3011     base = xlat;
3012
3013     for (;;) {
3014         len -= l;
3015         addr += l;
3016         done += l;
3017         if (len == 0) {
3018             break;
3019         }
3020
3021         l = len;
3022         this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
3023         if (this_mr != mr || xlat != base + done) {
3024             break;
3025         }
3026     }
3027
3028     memory_region_ref(mr);
3029     *plen = done;
3030     ptr = qemu_ram_ptr_length(mr->ram_block, base, plen);
3031     rcu_read_unlock();
3032
3033     return ptr;
3034 }
3035
3036 /* Unmaps a memory region previously mapped by address_space_map().
3037  * Will also mark the memory as dirty if is_write == 1.  access_len gives
3038  * the amount of memory that was actually read or written by the caller.
3039  */
3040 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3041                          int is_write, hwaddr access_len)
3042 {
3043     if (buffer != bounce.buffer) {
3044         MemoryRegion *mr;
3045         ram_addr_t addr1;
3046
3047         mr = memory_region_from_host(buffer, &addr1);
3048         assert(mr != NULL);
3049         if (is_write) {
3050             invalidate_and_set_dirty(mr, addr1, access_len);
3051         }
3052         if (xen_enabled()) {
3053             xen_invalidate_map_cache_entry(buffer);
3054         }
3055         memory_region_unref(mr);
3056         return;
3057     }
3058     if (is_write) {
3059         address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
3060                             bounce.buffer, access_len);
3061     }
3062     qemu_vfree(bounce.buffer);
3063     bounce.buffer = NULL;
3064     memory_region_unref(bounce.mr);
3065     atomic_mb_set(&bounce.in_use, false);
3066     cpu_notify_map_clients();
3067 }
3068
3069 void *cpu_physical_memory_map(hwaddr addr,
3070                               hwaddr *plen,
3071                               int is_write)
3072 {
3073     return address_space_map(&address_space_memory, addr, plen, is_write);
3074 }
3075
3076 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3077                                int is_write, hwaddr access_len)
3078 {
3079     return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3080 }
3081
3082 /* warning: addr must be aligned */
3083 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3084                                                   MemTxAttrs attrs,
3085                                                   MemTxResult *result,
3086                                                   enum device_endian endian)
3087 {
3088     uint8_t *ptr;
3089     uint64_t val;
3090     MemoryRegion *mr;
3091     hwaddr l = 4;
3092     hwaddr addr1;
3093     MemTxResult r;
3094     bool release_lock = false;
3095
3096     rcu_read_lock();
3097     mr = address_space_translate(as, addr, &addr1, &l, false);
3098     if (l < 4 || !memory_access_is_direct(mr, false)) {
3099         release_lock |= prepare_mmio_access(mr);
3100
3101         /* I/O case */
3102         r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3103 #if defined(TARGET_WORDS_BIGENDIAN)
3104         if (endian == DEVICE_LITTLE_ENDIAN) {
3105             val = bswap32(val);
3106         }
3107 #else
3108         if (endian == DEVICE_BIG_ENDIAN) {
3109             val = bswap32(val);
3110         }
3111 #endif
3112     } else {
3113         /* RAM case */
3114         ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3115         switch (endian) {
3116         case DEVICE_LITTLE_ENDIAN:
3117             val = ldl_le_p(ptr);
3118             break;
3119         case DEVICE_BIG_ENDIAN:
3120             val = ldl_be_p(ptr);
3121             break;
3122         default:
3123             val = ldl_p(ptr);
3124             break;
3125         }
3126         r = MEMTX_OK;
3127     }
3128     if (result) {
3129         *result = r;
3130     }
3131     if (release_lock) {
3132         qemu_mutex_unlock_iothread();
3133     }
3134     rcu_read_unlock();
3135     return val;
3136 }
3137
3138 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3139                            MemTxAttrs attrs, MemTxResult *result)
3140 {
3141     return address_space_ldl_internal(as, addr, attrs, result,
3142                                       DEVICE_NATIVE_ENDIAN);
3143 }
3144
3145 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3146                               MemTxAttrs attrs, MemTxResult *result)
3147 {
3148     return address_space_ldl_internal(as, addr, attrs, result,
3149                                       DEVICE_LITTLE_ENDIAN);
3150 }
3151
3152 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3153                               MemTxAttrs attrs, MemTxResult *result)
3154 {
3155     return address_space_ldl_internal(as, addr, attrs, result,
3156                                       DEVICE_BIG_ENDIAN);
3157 }
3158
3159 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3160 {
3161     return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3162 }
3163
3164 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3165 {
3166     return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3167 }
3168
3169 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3170 {
3171     return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3172 }
3173
3174 /* warning: addr must be aligned */
3175 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3176                                                   MemTxAttrs attrs,
3177                                                   MemTxResult *result,
3178                                                   enum device_endian endian)
3179 {
3180     uint8_t *ptr;
3181     uint64_t val;
3182     MemoryRegion *mr;
3183     hwaddr l = 8;
3184     hwaddr addr1;
3185     MemTxResult r;
3186     bool release_lock = false;
3187
3188     rcu_read_lock();
3189     mr = address_space_translate(as, addr, &addr1, &l,
3190                                  false);
3191     if (l < 8 || !memory_access_is_direct(mr, false)) {
3192         release_lock |= prepare_mmio_access(mr);
3193
3194         /* I/O case */
3195         r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3196 #if defined(TARGET_WORDS_BIGENDIAN)
3197         if (endian == DEVICE_LITTLE_ENDIAN) {
3198             val = bswap64(val);
3199         }
3200 #else
3201         if (endian == DEVICE_BIG_ENDIAN) {
3202             val = bswap64(val);
3203         }
3204 #endif
3205     } else {
3206         /* RAM case */
3207         ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3208         switch (endian) {
3209         case DEVICE_LITTLE_ENDIAN:
3210             val = ldq_le_p(ptr);
3211             break;
3212         case DEVICE_BIG_ENDIAN:
3213             val = ldq_be_p(ptr);
3214             break;
3215         default:
3216             val = ldq_p(ptr);
3217             break;
3218         }
3219         r = MEMTX_OK;
3220     }
3221     if (result) {
3222         *result = r;
3223     }
3224     if (release_lock) {
3225         qemu_mutex_unlock_iothread();
3226     }
3227     rcu_read_unlock();
3228     return val;
3229 }
3230
3231 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3232                            MemTxAttrs attrs, MemTxResult *result)
3233 {
3234     return address_space_ldq_internal(as, addr, attrs, result,
3235                                       DEVICE_NATIVE_ENDIAN);
3236 }
3237
3238 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3239                            MemTxAttrs attrs, MemTxResult *result)
3240 {
3241     return address_space_ldq_internal(as, addr, attrs, result,
3242                                       DEVICE_LITTLE_ENDIAN);
3243 }
3244
3245 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3246                            MemTxAttrs attrs, MemTxResult *result)
3247 {
3248     return address_space_ldq_internal(as, addr, attrs, result,
3249                                       DEVICE_BIG_ENDIAN);
3250 }
3251
3252 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3253 {
3254     return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3255 }
3256
3257 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3258 {
3259     return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3260 }
3261
3262 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3263 {
3264     return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3265 }
3266
3267 /* XXX: optimize */
3268 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3269                             MemTxAttrs attrs, MemTxResult *result)
3270 {
3271     uint8_t val;
3272     MemTxResult r;
3273
3274     r = address_space_rw(as, addr, attrs, &val, 1, 0);
3275     if (result) {
3276         *result = r;
3277     }
3278     return val;
3279 }
3280
3281 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3282 {
3283     return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3284 }
3285
3286 /* warning: addr must be aligned */
3287 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3288                                                    hwaddr addr,
3289                                                    MemTxAttrs attrs,
3290                                                    MemTxResult *result,
3291                                                    enum device_endian endian)
3292 {
3293     uint8_t *ptr;
3294     uint64_t val;
3295     MemoryRegion *mr;
3296     hwaddr l = 2;
3297     hwaddr addr1;
3298     MemTxResult r;
3299     bool release_lock = false;
3300
3301     rcu_read_lock();
3302     mr = address_space_translate(as, addr, &addr1, &l,
3303                                  false);
3304     if (l < 2 || !memory_access_is_direct(mr, false)) {
3305         release_lock |= prepare_mmio_access(mr);
3306
3307         /* I/O case */
3308         r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3309 #if defined(TARGET_WORDS_BIGENDIAN)
3310         if (endian == DEVICE_LITTLE_ENDIAN) {
3311             val = bswap16(val);
3312         }
3313 #else
3314         if (endian == DEVICE_BIG_ENDIAN) {
3315             val = bswap16(val);
3316         }
3317 #endif
3318     } else {
3319         /* RAM case */
3320         ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3321         switch (endian) {
3322         case DEVICE_LITTLE_ENDIAN:
3323             val = lduw_le_p(ptr);
3324             break;
3325         case DEVICE_BIG_ENDIAN:
3326             val = lduw_be_p(ptr);
3327             break;
3328         default:
3329             val = lduw_p(ptr);
3330             break;
3331         }
3332         r = MEMTX_OK;
3333     }
3334     if (result) {
3335         *result = r;
3336     }
3337     if (release_lock) {
3338         qemu_mutex_unlock_iothread();
3339     }
3340     rcu_read_unlock();
3341     return val;
3342 }
3343
3344 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3345                            MemTxAttrs attrs, MemTxResult *result)
3346 {
3347     return address_space_lduw_internal(as, addr, attrs, result,
3348                                        DEVICE_NATIVE_ENDIAN);
3349 }
3350
3351 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3352                            MemTxAttrs attrs, MemTxResult *result)
3353 {
3354     return address_space_lduw_internal(as, addr, attrs, result,
3355                                        DEVICE_LITTLE_ENDIAN);
3356 }
3357
3358 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3359                            MemTxAttrs attrs, MemTxResult *result)
3360 {
3361     return address_space_lduw_internal(as, addr, attrs, result,
3362                                        DEVICE_BIG_ENDIAN);
3363 }
3364
3365 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3366 {
3367     return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3368 }
3369
3370 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3371 {
3372     return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3373 }
3374
3375 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3376 {
3377     return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3378 }
3379
3380 /* warning: addr must be aligned. The ram page is not masked as dirty
3381    and the code inside is not invalidated. It is useful if the dirty
3382    bits are used to track modified PTEs */
3383 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3384                                 MemTxAttrs attrs, MemTxResult *result)
3385 {
3386     uint8_t *ptr;
3387     MemoryRegion *mr;
3388     hwaddr l = 4;
3389     hwaddr addr1;
3390     MemTxResult r;
3391     uint8_t dirty_log_mask;
3392     bool release_lock = false;
3393
3394     rcu_read_lock();
3395     mr = address_space_translate(as, addr, &addr1, &l,
3396                                  true);
3397     if (l < 4 || !memory_access_is_direct(mr, true)) {
3398         release_lock |= prepare_mmio_access(mr);
3399
3400         r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3401     } else {
3402         ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3403         stl_p(ptr, val);
3404
3405         dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3406         dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3407         cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr,
3408                                             4, dirty_log_mask);
3409         r = MEMTX_OK;
3410     }
3411     if (result) {
3412         *result = r;
3413     }
3414     if (release_lock) {
3415         qemu_mutex_unlock_iothread();
3416     }
3417     rcu_read_unlock();
3418 }
3419
3420 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3421 {
3422     address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3423 }
3424
3425 /* warning: addr must be aligned */
3426 static inline void address_space_stl_internal(AddressSpace *as,
3427                                               hwaddr addr, uint32_t val,
3428                                               MemTxAttrs attrs,
3429                                               MemTxResult *result,
3430                                               enum device_endian endian)
3431 {
3432     uint8_t *ptr;
3433     MemoryRegion *mr;
3434     hwaddr l = 4;
3435     hwaddr addr1;
3436     MemTxResult r;
3437     bool release_lock = false;
3438
3439     rcu_read_lock();
3440     mr = address_space_translate(as, addr, &addr1, &l,
3441                                  true);
3442     if (l < 4 || !memory_access_is_direct(mr, true)) {
3443         release_lock |= prepare_mmio_access(mr);
3444
3445 #if defined(TARGET_WORDS_BIGENDIAN)
3446         if (endian == DEVICE_LITTLE_ENDIAN) {
3447             val = bswap32(val);
3448         }
3449 #else
3450         if (endian == DEVICE_BIG_ENDIAN) {
3451             val = bswap32(val);
3452         }
3453 #endif
3454         r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3455     } else {
3456         /* RAM case */
3457         ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3458         switch (endian) {
3459         case DEVICE_LITTLE_ENDIAN:
3460             stl_le_p(ptr, val);
3461             break;
3462         case DEVICE_BIG_ENDIAN:
3463             stl_be_p(ptr, val);
3464             break;
3465         default:
3466             stl_p(ptr, val);
3467             break;
3468         }
3469         invalidate_and_set_dirty(mr, addr1, 4);
3470         r = MEMTX_OK;
3471     }
3472     if (result) {
3473         *result = r;
3474     }
3475     if (release_lock) {
3476         qemu_mutex_unlock_iothread();
3477     }
3478     rcu_read_unlock();
3479 }
3480
3481 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3482                        MemTxAttrs attrs, MemTxResult *result)
3483 {
3484     address_space_stl_internal(as, addr, val, attrs, result,
3485                                DEVICE_NATIVE_ENDIAN);
3486 }
3487
3488 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3489                        MemTxAttrs attrs, MemTxResult *result)
3490 {
3491     address_space_stl_internal(as, addr, val, attrs, result,
3492                                DEVICE_LITTLE_ENDIAN);
3493 }
3494
3495 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3496                        MemTxAttrs attrs, MemTxResult *result)
3497 {
3498     address_space_stl_internal(as, addr, val, attrs, result,
3499                                DEVICE_BIG_ENDIAN);
3500 }
3501
3502 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3503 {
3504     address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3505 }
3506
3507 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3508 {
3509     address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3510 }
3511
3512 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3513 {
3514     address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3515 }
3516
3517 /* XXX: optimize */
3518 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3519                        MemTxAttrs attrs, MemTxResult *result)
3520 {
3521     uint8_t v = val;
3522     MemTxResult r;
3523
3524     r = address_space_rw(as, addr, attrs, &v, 1, 1);
3525     if (result) {
3526         *result = r;
3527     }
3528 }
3529
3530 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3531 {
3532     address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3533 }
3534
3535 /* warning: addr must be aligned */
3536 static inline void address_space_stw_internal(AddressSpace *as,
3537                                               hwaddr addr, uint32_t val,
3538                                               MemTxAttrs attrs,
3539                                               MemTxResult *result,
3540                                               enum device_endian endian)
3541 {
3542     uint8_t *ptr;
3543     MemoryRegion *mr;
3544     hwaddr l = 2;
3545     hwaddr addr1;
3546     MemTxResult r;
3547     bool release_lock = false;
3548
3549     rcu_read_lock();
3550     mr = address_space_translate(as, addr, &addr1, &l, true);
3551     if (l < 2 || !memory_access_is_direct(mr, true)) {
3552         release_lock |= prepare_mmio_access(mr);
3553
3554 #if defined(TARGET_WORDS_BIGENDIAN)
3555         if (endian == DEVICE_LITTLE_ENDIAN) {
3556             val = bswap16(val);
3557         }
3558 #else
3559         if (endian == DEVICE_BIG_ENDIAN) {
3560             val = bswap16(val);
3561         }
3562 #endif
3563         r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3564     } else {
3565         /* RAM case */
3566         ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3567         switch (endian) {
3568         case DEVICE_LITTLE_ENDIAN:
3569             stw_le_p(ptr, val);
3570             break;
3571         case DEVICE_BIG_ENDIAN:
3572             stw_be_p(ptr, val);
3573             break;
3574         default:
3575             stw_p(ptr, val);
3576             break;
3577         }
3578         invalidate_and_set_dirty(mr, addr1, 2);
3579         r = MEMTX_OK;
3580     }
3581     if (result) {
3582         *result = r;
3583     }
3584     if (release_lock) {
3585         qemu_mutex_unlock_iothread();
3586     }
3587     rcu_read_unlock();
3588 }
3589
3590 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3591                        MemTxAttrs attrs, MemTxResult *result)
3592 {
3593     address_space_stw_internal(as, addr, val, attrs, result,
3594                                DEVICE_NATIVE_ENDIAN);
3595 }
3596
3597 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3598                        MemTxAttrs attrs, MemTxResult *result)
3599 {
3600     address_space_stw_internal(as, addr, val, attrs, result,
3601                                DEVICE_LITTLE_ENDIAN);
3602 }
3603
3604 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3605                        MemTxAttrs attrs, MemTxResult *result)
3606 {
3607     address_space_stw_internal(as, addr, val, attrs, result,
3608                                DEVICE_BIG_ENDIAN);
3609 }
3610
3611 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3612 {
3613     address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3614 }
3615
3616 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3617 {
3618     address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3619 }
3620
3621 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3622 {
3623     address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3624 }
3625
3626 /* XXX: optimize */
3627 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3628                        MemTxAttrs attrs, MemTxResult *result)
3629 {
3630     MemTxResult r;
3631     val = tswap64(val);
3632     r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3633     if (result) {
3634         *result = r;
3635     }
3636 }
3637
3638 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3639                        MemTxAttrs attrs, MemTxResult *result)
3640 {
3641     MemTxResult r;
3642     val = cpu_to_le64(val);
3643     r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3644     if (result) {
3645         *result = r;
3646     }
3647 }
3648 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3649                        MemTxAttrs attrs, MemTxResult *result)
3650 {
3651     MemTxResult r;
3652     val = cpu_to_be64(val);
3653     r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3654     if (result) {
3655         *result = r;
3656     }
3657 }
3658
3659 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3660 {
3661     address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3662 }
3663
3664 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3665 {
3666     address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3667 }
3668
3669 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3670 {
3671     address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3672 }
3673
3674 /* virtual memory access for debug (includes writing to ROM) */
3675 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3676                         uint8_t *buf, int len, int is_write)
3677 {
3678     int l;
3679     hwaddr phys_addr;
3680     target_ulong page;
3681
3682     while (len > 0) {
3683         int asidx;
3684         MemTxAttrs attrs;
3685
3686         page = addr & TARGET_PAGE_MASK;
3687         phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3688         asidx = cpu_asidx_from_attrs(cpu, attrs);
3689         /* if no physical page mapped, return an error */
3690         if (phys_addr == -1)
3691             return -1;
3692         l = (page + TARGET_PAGE_SIZE) - addr;
3693         if (l > len)
3694             l = len;
3695         phys_addr += (addr & ~TARGET_PAGE_MASK);
3696         if (is_write) {
3697             cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3698                                           phys_addr, buf, l);
3699         } else {
3700             address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3701                              MEMTXATTRS_UNSPECIFIED,
3702                              buf, l, 0);
3703         }
3704         len -= l;
3705         buf += l;
3706         addr += l;
3707     }
3708     return 0;
3709 }
3710
3711 /*
3712  * Allows code that needs to deal with migration bitmaps etc to still be built
3713  * target independent.
3714  */
3715 size_t qemu_target_page_bits(void)
3716 {
3717     return TARGET_PAGE_BITS;
3718 }
3719
3720 #endif
3721
3722 /*
3723  * A helper function for the _utterly broken_ virtio device model to find out if
3724  * it's running on a big endian machine. Don't do this at home kids!
3725  */
3726 bool target_words_bigendian(void);
3727 bool target_words_bigendian(void)
3728 {
3729 #if defined(TARGET_WORDS_BIGENDIAN)
3730     return true;
3731 #else
3732     return false;
3733 #endif
3734 }
3735
3736 #ifndef CONFIG_USER_ONLY
3737 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3738 {
3739     MemoryRegion*mr;
3740     hwaddr l = 1;
3741     bool res;
3742
3743     rcu_read_lock();
3744     mr = address_space_translate(&address_space_memory,
3745                                  phys_addr, &phys_addr, &l, false);
3746
3747     res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3748     rcu_read_unlock();
3749     return res;
3750 }
3751
3752 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3753 {
3754     RAMBlock *block;
3755     int ret = 0;
3756
3757     rcu_read_lock();
3758     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3759         ret = func(block->idstr, block->host, block->offset,
3760                    block->used_length, opaque);
3761         if (ret) {
3762             break;
3763         }
3764     }
3765     rcu_read_unlock();
3766     return ret;
3767 }
3768 #endif