exec.c

   1 /*
   2  *  Virtual page mapping
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include "qemu/osdep.h"
  20 #include "qapi/error.h"
  21 #ifndef _WIN32
  22 #endif
  23
  24 #include "qemu/cutils.h"
  25 #include "cpu.h"
  26 #include "exec/exec-all.h"
  27 #include "tcg.h"
  28 #include "hw/qdev-core.h"
  29 #if !defined(CONFIG_USER_ONLY)
  30 #include "hw/boards.h"
  31 #include "hw/xen/xen.h"
  32 #endif
  33 #include "sysemu/kvm.h"
  34 #include "sysemu/sysemu.h"
  35 #include "qemu/timer.h"
  36 #include "qemu/config-file.h"
  37 #include "qemu/error-report.h"
  38 #if defined(CONFIG_USER_ONLY)
  39 #include "qemu.h"
  40 #else /* !CONFIG_USER_ONLY */
  41 #include "hw/hw.h"
  42 #include "exec/memory.h"
  43 #include "exec/ioport.h"
  44 #include "sysemu/dma.h"
  45 #include "exec/address-spaces.h"
  46 #include "sysemu/xen-mapcache.h"
  47 #include "trace.h"
  48 #endif
  49 #include "exec/cpu-all.h"
  50 #include "qemu/rcu_queue.h"
  51 #include "qemu/main-loop.h"
  52 #include "translate-all.h"
  53 #include "sysemu/replay.h"
  54
  55 #include "exec/memory-internal.h"
  56 #include "exec/ram_addr.h"
  57 #include "exec/log.h"
  58
  59 #include "migration/vmstate.h"
  60
  61 #include "qemu/range.h"
  62 #ifndef _WIN32
  63 #include "qemu/mmap-alloc.h"
  64 #endif
  65
  66 //#define DEBUG_SUBPAGE
  67
  68 #if !defined(CONFIG_USER_ONLY)
  69 /* ram_list is read under rcu_read_lock()/rcu_read_unlock().  Writes
  70  * are protected by the ramlist lock.
  71  */
  72 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
  73
  74 static MemoryRegion *system_memory;
  75 static MemoryRegion *system_io;
  76
  77 AddressSpace address_space_io;
  78 AddressSpace address_space_memory;
  79
  80 MemoryRegion io_mem_rom, io_mem_notdirty;
  81 static MemoryRegion io_mem_unassigned;
  82
  83 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
  84 #define RAM_PREALLOC   (1 << 0)
  85
  86 /* RAM is mmap-ed with MAP_SHARED */
  87 #define RAM_SHARED     (1 << 1)
  88
  89 /* Only a portion of RAM (used_length) is actually used, and migrated.
  90  * This used_length size can change across reboots.
  91  */
  92 #define RAM_RESIZEABLE (1 << 2)
  93
  94 #endif
  95
  96 #ifdef TARGET_PAGE_BITS_VARY
  97 int target_page_bits;
  98 bool target_page_bits_decided;
  99 #endif
 100
 101 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
 102 /* current CPU in the current thread. It is only valid inside
 103    cpu_exec() */
 104 __thread CPUState *current_cpu;
 105 /* 0 = Do not count executed instructions.
 106    1 = Precise instruction counting.
 107    2 = Adaptive rate instruction counting.  */
 108 int use_icount;
 109
 110 bool set_preferred_target_page_bits(int bits)
 111 {
 112     /* The target page size is the lowest common denominator for all
 113      * the CPUs in the system, so we can only make it smaller, never
 114      * larger. And we can't make it smaller once we've committed to
 115      * a particular size.
 116      */
 117 #ifdef TARGET_PAGE_BITS_VARY
 118     assert(bits >= TARGET_PAGE_BITS_MIN);
 119     if (target_page_bits == 0 || target_page_bits > bits) {
 120         if (target_page_bits_decided) {
 121             return false;
 122         }
 123         target_page_bits = bits;
 124     }
 125 #endif
 126     return true;
 127 }
 128
 129 #if !defined(CONFIG_USER_ONLY)
 130
 131 static void finalize_target_page_bits(void)
 132 {
 133 #ifdef TARGET_PAGE_BITS_VARY
 134     if (target_page_bits == 0) {
 135         target_page_bits = TARGET_PAGE_BITS_MIN;
 136     }
 137     target_page_bits_decided = true;
 138 #endif
 139 }
 140
 141 typedef struct PhysPageEntry PhysPageEntry;
 142
 143 struct PhysPageEntry {
 144     /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
 145     uint32_t skip : 6;
 146      /* index into phys_sections (!skip) or phys_map_nodes (skip) */
 147     uint32_t ptr : 26;
 148 };
 149
 150 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
 151
 152 /* Size of the L2 (and L3, etc) page tables.  */
 153 #define ADDR_SPACE_BITS 64
 154
 155 #define P_L2_BITS 9
 156 #define P_L2_SIZE (1 << P_L2_BITS)
 157
 158 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
 159
 160 typedef PhysPageEntry Node[P_L2_SIZE];
 161
 162 typedef struct PhysPageMap {
 163     struct rcu_head rcu;
 164
 165     unsigned sections_nb;
 166     unsigned sections_nb_alloc;
 167     unsigned nodes_nb;
 168     unsigned nodes_nb_alloc;
 169     Node *nodes;
 170     MemoryRegionSection *sections;
 171 } PhysPageMap;
 172
 173 struct AddressSpaceDispatch {
 174     struct rcu_head rcu;
 175
 176     MemoryRegionSection *mru_section;
 177     /* This is a multi-level map on the physical address space.
 178      * The bottom level has pointers to MemoryRegionSections.
 179      */
 180     PhysPageEntry phys_map;
 181     PhysPageMap map;
 182     AddressSpace *as;
 183 };
 184
 185 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
 186 typedef struct subpage_t {
 187     MemoryRegion iomem;
 188     AddressSpace *as;
 189     hwaddr base;
 190     uint16_t sub_section[];
 191 } subpage_t;
 192
 193 #define PHYS_SECTION_UNASSIGNED 0
 194 #define PHYS_SECTION_NOTDIRTY 1
 195 #define PHYS_SECTION_ROM 2
 196 #define PHYS_SECTION_WATCH 3
 197
 198 static void io_mem_init(void);
 199 static void memory_map_init(void);
 200 static void tcg_commit(MemoryListener *listener);
 201
 202 static MemoryRegion io_mem_watch;
 203
 204 /**
 205  * CPUAddressSpace: all the information a CPU needs about an AddressSpace
 206  * @cpu: the CPU whose AddressSpace this is
 207  * @as: the AddressSpace itself
 208  * @memory_dispatch: its dispatch pointer (cached, RCU protected)
 209  * @tcg_as_listener: listener for tracking changes to the AddressSpace
 210  */
 211 struct CPUAddressSpace {
 212     CPUState *cpu;
 213     AddressSpace *as;
 214     struct AddressSpaceDispatch *memory_dispatch;
 215     MemoryListener tcg_as_listener;
 216 };
 217
 218 #endif
 219
 220 #if !defined(CONFIG_USER_ONLY)
 221
 222 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
 223 {
 224     static unsigned alloc_hint = 16;
 225     if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
 226         map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, alloc_hint);
 227         map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
 228         map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
 229         alloc_hint = map->nodes_nb_alloc;
 230     }
 231 }
 232
 233 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
 234 {
 235     unsigned i;
 236     uint32_t ret;
 237     PhysPageEntry e;
 238     PhysPageEntry *p;
 239
 240     ret = map->nodes_nb++;
 241     p = map->nodes[ret];
 242     assert(ret != PHYS_MAP_NODE_NIL);
 243     assert(ret != map->nodes_nb_alloc);
 244
 245     e.skip = leaf ? 0 : 1;
 246     e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
 247     for (i = 0; i < P_L2_SIZE; ++i) {
 248         memcpy(&p[i], &e, sizeof(e));
 249     }
 250     return ret;
 251 }
 252
 253 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
 254                                 hwaddr *index, hwaddr *nb, uint16_t leaf,
 255                                 int level)
 256 {
 257     PhysPageEntry *p;
 258     hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
 259
 260     if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
 261         lp->ptr = phys_map_node_alloc(map, level == 0);
 262     }
 263     p = map->nodes[lp->ptr];
 264     lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
 265
 266     while (*nb && lp < &p[P_L2_SIZE]) {
 267         if ((*index & (step - 1)) == 0 && *nb >= step) {
 268             lp->skip = 0;
 269             lp->ptr = leaf;
 270             *index += step;
 271             *nb -= step;
 272         } else {
 273             phys_page_set_level(map, lp, index, nb, leaf, level - 1);
 274         }
 275         ++lp;
 276     }
 277 }
 278
 279 static void phys_page_set(AddressSpaceDispatch *d,
 280                           hwaddr index, hwaddr nb,
 281                           uint16_t leaf)
 282 {
 283     /* Wildly overreserve - it doesn't matter much. */
 284     phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
 285
 286     phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
 287 }
 288
 289 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
 290  * and update our entry so we can skip it and go directly to the destination.
 291  */
 292 static void phys_page_compact(PhysPageEntry *lp, Node *nodes)
 293 {
 294     unsigned valid_ptr = P_L2_SIZE;
 295     int valid = 0;
 296     PhysPageEntry *p;
 297     int i;
 298
 299     if (lp->ptr == PHYS_MAP_NODE_NIL) {
 300         return;
 301     }
 302
 303     p = nodes[lp->ptr];
 304     for (i = 0; i < P_L2_SIZE; i++) {
 305         if (p[i].ptr == PHYS_MAP_NODE_NIL) {
 306             continue;
 307         }
 308
 309         valid_ptr = i;
 310         valid++;
 311         if (p[i].skip) {
 312             phys_page_compact(&p[i], nodes);
 313         }
 314     }
 315
 316     /* We can only compress if there's only one child. */
 317     if (valid != 1) {
 318         return;
 319     }
 320
 321     assert(valid_ptr < P_L2_SIZE);
 322
 323     /* Don't compress if it won't fit in the # of bits we have. */
 324     if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
 325         return;
 326     }
 327
 328     lp->ptr = p[valid_ptr].ptr;
 329     if (!p[valid_ptr].skip) {
 330         /* If our only child is a leaf, make this a leaf. */
 331         /* By design, we should have made this node a leaf to begin with so we
 332          * should never reach here.
 333          * But since it's so simple to handle this, let's do it just in case we
 334          * change this rule.
 335          */
 336         lp->skip = 0;
 337     } else {
 338         lp->skip += p[valid_ptr].skip;
 339     }
 340 }
 341
 342 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
 343 {
 344     if (d->phys_map.skip) {
 345         phys_page_compact(&d->phys_map, d->map.nodes);
 346     }
 347 }
 348
 349 static inline bool section_covers_addr(const MemoryRegionSection *section,
 350                                        hwaddr addr)
 351 {
 352     /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
 353      * the section must cover the entire address space.
 354      */
 355     return int128_gethi(section->size) ||
 356            range_covers_byte(section->offset_within_address_space,
 357                              int128_getlo(section->size), addr);
 358 }
 359
 360 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
 361                                            Node *nodes, MemoryRegionSection *sections)
 362 {
 363     PhysPageEntry *p;
 364     hwaddr index = addr >> TARGET_PAGE_BITS;
 365     int i;
 366
 367     for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
 368         if (lp.ptr == PHYS_MAP_NODE_NIL) {
 369             return &sections[PHYS_SECTION_UNASSIGNED];
 370         }
 371         p = nodes[lp.ptr];
 372         lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
 373     }
 374
 375     if (section_covers_addr(&sections[lp.ptr], addr)) {
 376         return &sections[lp.ptr];
 377     } else {
 378         return &sections[PHYS_SECTION_UNASSIGNED];
 379     }
 380 }
 381
 382 bool memory_region_is_unassigned(MemoryRegion *mr)
 383 {
 384     return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
 385         && mr != &io_mem_watch;
 386 }
 387
 388 /* Called from RCU critical section */
 389 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
 390                                                         hwaddr addr,
 391                                                         bool resolve_subpage)
 392 {
 393     MemoryRegionSection *section = atomic_read(&d->mru_section);
 394     subpage_t *subpage;
 395     bool update;
 396
 397     if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
 398         section_covers_addr(section, addr)) {
 399         update = false;
 400     } else {
 401         section = phys_page_find(d->phys_map, addr, d->map.nodes,
 402                                  d->map.sections);
 403         update = true;
 404     }
 405     if (resolve_subpage && section->mr->subpage) {
 406         subpage = container_of(section->mr, subpage_t, iomem);
 407         section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
 408     }
 409     if (update) {
 410         atomic_set(&d->mru_section, section);
 411     }
 412     return section;
 413 }
 414
 415 /* Called from RCU critical section */
 416 static MemoryRegionSection *
 417 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
 418                                  hwaddr *plen, bool resolve_subpage)
 419 {
 420     MemoryRegionSection *section;
 421     MemoryRegion *mr;
 422     Int128 diff;
 423
 424     section = address_space_lookup_region(d, addr, resolve_subpage);
 425     /* Compute offset within MemoryRegionSection */
 426     addr -= section->offset_within_address_space;
 427
 428     /* Compute offset within MemoryRegion */
 429     *xlat = addr + section->offset_within_region;
 430
 431     mr = section->mr;
 432
 433     /* MMIO registers can be expected to perform full-width accesses based only
 434      * on their address, without considering adjacent registers that could
 435      * decode to completely different MemoryRegions.  When such registers
 436      * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
 437      * regions overlap wildly.  For this reason we cannot clamp the accesses
 438      * here.
 439      *
 440      * If the length is small (as is the case for address_space_ldl/stl),
 441      * everything works fine.  If the incoming length is large, however,
 442      * the caller really has to do the clamping through memory_access_size.
 443      */
 444     if (memory_region_is_ram(mr)) {
 445         diff = int128_sub(section->size, int128_make64(addr));
 446         *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
 447     }
 448     return section;
 449 }
 450
 451 /* Called from RCU critical section */
 452 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
 453                                       hwaddr *xlat, hwaddr *plen,
 454                                       bool is_write)
 455 {
 456     IOMMUTLBEntry iotlb;
 457     MemoryRegionSection *section;
 458     MemoryRegion *mr;
 459
 460     for (;;) {
 461         AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
 462         section = address_space_translate_internal(d, addr, &addr, plen, true);
 463         mr = section->mr;
 464
 465         if (!mr->iommu_ops) {
 466             break;
 467         }
 468
 469         iotlb = mr->iommu_ops->translate(mr, addr, is_write);
 470         addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
 471                 | (addr & iotlb.addr_mask));
 472         *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
 473         if (!(iotlb.perm & (1 << is_write))) {
 474             mr = &io_mem_unassigned;
 475             break;
 476         }
 477
 478         as = iotlb.target_as;
 479     }
 480
 481     if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
 482         hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
 483         *plen = MIN(page, *plen);
 484     }
 485
 486     *xlat = addr;
 487     return mr;
 488 }
 489
 490 /* Called from RCU critical section */
 491 MemoryRegionSection *
 492 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
 493                                   hwaddr *xlat, hwaddr *plen)
 494 {
 495     MemoryRegionSection *section;
 496     AddressSpaceDispatch *d = atomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch);
 497
 498     section = address_space_translate_internal(d, addr, xlat, plen, false);
 499
 500     assert(!section->mr->iommu_ops);
 501     return section;
 502 }
 503 #endif
 504
 505 #if !defined(CONFIG_USER_ONLY)
 506
 507 static int cpu_common_post_load(void *opaque, int version_id)
 508 {
 509     CPUState *cpu = opaque;
 510
 511     /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
 512        version_id is increased. */
 513     cpu->interrupt_request &= ~0x01;
 514     tlb_flush(cpu, 1);
 515
 516     return 0;
 517 }
 518
 519 static int cpu_common_pre_load(void *opaque)
 520 {
 521     CPUState *cpu = opaque;
 522
 523     cpu->exception_index = -1;
 524
 525     return 0;
 526 }
 527
 528 static bool cpu_common_exception_index_needed(void *opaque)
 529 {
 530     CPUState *cpu = opaque;
 531
 532     return tcg_enabled() && cpu->exception_index != -1;
 533 }
 534
 535 static const VMStateDescription vmstate_cpu_common_exception_index = {
 536     .name = "cpu_common/exception_index",
 537     .version_id = 1,
 538     .minimum_version_id = 1,
 539     .needed = cpu_common_exception_index_needed,
 540     .fields = (VMStateField[]) {
 541         VMSTATE_INT32(exception_index, CPUState),
 542         VMSTATE_END_OF_LIST()
 543     }
 544 };
 545
 546 static bool cpu_common_crash_occurred_needed(void *opaque)
 547 {
 548     CPUState *cpu = opaque;
 549
 550     return cpu->crash_occurred;
 551 }
 552
 553 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
 554     .name = "cpu_common/crash_occurred",
 555     .version_id = 1,
 556     .minimum_version_id = 1,
 557     .needed = cpu_common_crash_occurred_needed,
 558     .fields = (VMStateField[]) {
 559         VMSTATE_BOOL(crash_occurred, CPUState),
 560         VMSTATE_END_OF_LIST()
 561     }
 562 };
 563
 564 const VMStateDescription vmstate_cpu_common = {
 565     .name = "cpu_common",
 566     .version_id = 1,
 567     .minimum_version_id = 1,
 568     .pre_load = cpu_common_pre_load,
 569     .post_load = cpu_common_post_load,
 570     .fields = (VMStateField[]) {
 571         VMSTATE_UINT32(halted, CPUState),
 572         VMSTATE_UINT32(interrupt_request, CPUState),
 573         VMSTATE_END_OF_LIST()
 574     },
 575     .subsections = (const VMStateDescription*[]) {
 576         &vmstate_cpu_common_exception_index,
 577         &vmstate_cpu_common_crash_occurred,
 578         NULL
 579     }
 580 };
 581
 582 #endif
 583
 584 CPUState *qemu_get_cpu(int index)
 585 {
 586     CPUState *cpu;
 587
 588     CPU_FOREACH(cpu) {
 589         if (cpu->cpu_index == index) {
 590             return cpu;
 591         }
 592     }
 593
 594     return NULL;
 595 }
 596
 597 #if !defined(CONFIG_USER_ONLY)
 598 void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
 599 {
 600     CPUAddressSpace *newas;
 601
 602     /* Target code should have set num_ases before calling us */
 603     assert(asidx < cpu->num_ases);
 604
 605     if (asidx == 0) {
 606         /* address space 0 gets the convenience alias */
 607         cpu->as = as;
 608     }
 609
 610     /* KVM cannot currently support multiple address spaces. */
 611     assert(asidx == 0 || !kvm_enabled());
 612
 613     if (!cpu->cpu_ases) {
 614         cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
 615     }
 616
 617     newas = &cpu->cpu_ases[asidx];
 618     newas->cpu = cpu;
 619     newas->as = as;
 620     if (tcg_enabled()) {
 621         newas->tcg_as_listener.commit = tcg_commit;
 622         memory_listener_register(&newas->tcg_as_listener, as);
 623     }
 624 }
 625
 626 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
 627 {
 628     /* Return the AddressSpace corresponding to the specified index */
 629     return cpu->cpu_ases[asidx].as;
 630 }
 631 #endif
 632
 633 void cpu_exec_unrealizefn(CPUState *cpu)
 634 {
 635     CPUClass *cc = CPU_GET_CLASS(cpu);
 636
 637     cpu_list_remove(cpu);
 638
 639     if (cc->vmsd != NULL) {
 640         vmstate_unregister(NULL, cc->vmsd, cpu);
 641     }
 642     if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
 643         vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
 644     }
 645 }
 646
 647 void cpu_exec_initfn(CPUState *cpu)
 648 {
 649     cpu->as = NULL;
 650     cpu->num_ases = 0;
 651
 652 #ifndef CONFIG_USER_ONLY
 653     cpu->thread_id = qemu_get_thread_id();
 654
 655     /* This is a softmmu CPU object, so create a property for it
 656      * so users can wire up its memory. (This can't go in qom/cpu.c
 657      * because that file is compiled only once for both user-mode
 658      * and system builds.) The default if no link is set up is to use
 659      * the system address space.
 660      */
 661     object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
 662                              (Object **)&cpu->memory,
 663                              qdev_prop_allow_set_link_before_realize,
 664                              OBJ_PROP_LINK_UNREF_ON_RELEASE,
 665                              &error_abort);
 666     cpu->memory = system_memory;
 667     object_ref(OBJECT(cpu->memory));
 668 #endif
 669 }
 670
 671 void cpu_exec_realizefn(CPUState *cpu, Error **errp)
 672 {
 673     CPUClass *cc ATTRIBUTE_UNUSED = CPU_GET_CLASS(cpu);
 674
 675     cpu_list_add(cpu);
 676
 677 #ifndef CONFIG_USER_ONLY
 678     if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
 679         vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
 680     }
 681     if (cc->vmsd != NULL) {
 682         vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
 683     }
 684 #endif
 685 }
 686
 687 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 688 {
 689     /* Flush the whole TB as this will not have race conditions
 690      * even if we don't have proper locking yet.
 691      * Ideally we would just invalidate the TBs for the
 692      * specified PC.
 693      */
 694     tb_flush(cpu);
 695 }
 696
 697 #if defined(CONFIG_USER_ONLY)
 698 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 699
 700 {
 701 }
 702
 703 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 704                           int flags)
 705 {
 706     return -ENOSYS;
 707 }
 708
 709 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 710 {
 711 }
 712
 713 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 714                           int flags, CPUWatchpoint **watchpoint)
 715 {
 716     return -ENOSYS;
 717 }
 718 #else
 719 /* Add a watchpoint.  */
 720 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 721                           int flags, CPUWatchpoint **watchpoint)
 722 {
 723     CPUWatchpoint *wp;
 724
 725     /* forbid ranges which are empty or run off the end of the address space */
 726     if (len == 0 || (addr + len - 1) < addr) {
 727         error_report("tried to set invalid watchpoint at %"
 728                      VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
 729         return -EINVAL;
 730     }
 731     wp = g_malloc(sizeof(*wp));
 732
 733     wp->vaddr = addr;
 734     wp->len = len;
 735     wp->flags = flags;
 736
 737     /* keep all GDB-injected watchpoints in front */
 738     if (flags & BP_GDB) {
 739         QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
 740     } else {
 741         QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
 742     }
 743
 744     tlb_flush_page(cpu, addr);
 745
 746     if (watchpoint)
 747         *watchpoint = wp;
 748     return 0;
 749 }
 750
 751 /* Remove a specific watchpoint.  */
 752 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 753                           int flags)
 754 {
 755     CPUWatchpoint *wp;
 756
 757     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
 758         if (addr == wp->vaddr && len == wp->len
 759                 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
 760             cpu_watchpoint_remove_by_ref(cpu, wp);
 761             return 0;
 762         }
 763     }
 764     return -ENOENT;
 765 }
 766
 767 /* Remove a specific watchpoint by reference.  */
 768 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 769 {
 770     QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
 771
 772     tlb_flush_page(cpu, watchpoint->vaddr);
 773
 774     g_free(watchpoint);
 775 }
 776
 777 /* Remove all matching watchpoints.  */
 778 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 779 {
 780     CPUWatchpoint *wp, *next;
 781
 782     QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
 783         if (wp->flags & mask) {
 784             cpu_watchpoint_remove_by_ref(cpu, wp);
 785         }
 786     }
 787 }
 788
 789 /* Return true if this watchpoint address matches the specified
 790  * access (ie the address range covered by the watchpoint overlaps
 791  * partially or completely with the address range covered by the
 792  * access).
 793  */
 794 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
 795                                                   vaddr addr,
 796                                                   vaddr len)
 797 {
 798     /* We know the lengths are non-zero, but a little caution is
 799      * required to avoid errors in the case where the range ends
 800      * exactly at the top of the address space and so addr + len
 801      * wraps round to zero.
 802      */
 803     vaddr wpend = wp->vaddr + wp->len - 1;
 804     vaddr addrend = addr + len - 1;
 805
 806     return !(addr > wpend || wp->vaddr > addrend);
 807 }
 808
 809 #endif
 810
 811 /* Add a breakpoint.  */
 812 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
 813                           CPUBreakpoint **breakpoint)
 814 {
 815     CPUBreakpoint *bp;
 816
 817     bp = g_malloc(sizeof(*bp));
 818
 819     bp->pc = pc;
 820     bp->flags = flags;
 821
 822     /* keep all GDB-injected breakpoints in front */
 823     if (flags & BP_GDB) {
 824         QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
 825     } else {
 826         QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
 827     }
 828
 829     breakpoint_invalidate(cpu, pc);
 830
 831     if (breakpoint) {
 832         *breakpoint = bp;
 833     }
 834     return 0;
 835 }
 836
 837 /* Remove a specific breakpoint.  */
 838 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
 839 {
 840     CPUBreakpoint *bp;
 841
 842     QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
 843         if (bp->pc == pc && bp->flags == flags) {
 844             cpu_breakpoint_remove_by_ref(cpu, bp);
 845             return 0;
 846         }
 847     }
 848     return -ENOENT;
 849 }
 850
 851 /* Remove a specific breakpoint by reference.  */
 852 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
 853 {
 854     QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
 855
 856     breakpoint_invalidate(cpu, breakpoint->pc);
 857
 858     g_free(breakpoint);
 859 }
 860
 861 /* Remove all matching breakpoints. */
 862 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
 863 {
 864     CPUBreakpoint *bp, *next;
 865
 866     QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
 867         if (bp->flags & mask) {
 868             cpu_breakpoint_remove_by_ref(cpu, bp);
 869         }
 870     }
 871 }
 872
 873 /* enable or disable single step mode. EXCP_DEBUG is returned by the
 874    CPU loop after each instruction */
 875 void cpu_single_step(CPUState *cpu, int enabled)
 876 {
 877     if (cpu->singlestep_enabled != enabled) {
 878         cpu->singlestep_enabled = enabled;
 879         if (kvm_enabled()) {
 880             kvm_update_guest_debug(cpu, 0);
 881         } else {
 882             /* must flush all the translated code to avoid inconsistencies */
 883             /* XXX: only flush what is necessary */
 884             tb_flush(cpu);
 885         }
 886     }
 887 }
 888
 889 void cpu_abort(CPUState *cpu, const char *fmt, ...)
 890 {
 891     va_list ap;
 892     va_list ap2;
 893
 894     va_start(ap, fmt);
 895     va_copy(ap2, ap);
 896     fprintf(stderr, "qemu: fatal: ");
 897     vfprintf(stderr, fmt, ap);
 898     fprintf(stderr, "\n");
 899     cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 900     if (qemu_log_separate()) {
 901         qemu_log_lock();
 902         qemu_log("qemu: fatal: ");
 903         qemu_log_vprintf(fmt, ap2);
 904         qemu_log("\n");
 905         log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 906         qemu_log_flush();
 907         qemu_log_unlock();
 908         qemu_log_close();
 909     }
 910     va_end(ap2);
 911     va_end(ap);
 912     replay_finish();
 913 #if defined(CONFIG_USER_ONLY)
 914     {
 915         struct sigaction act;
 916         sigfillset(&act.sa_mask);
 917         act.sa_handler = SIG_DFL;
 918         sigaction(SIGABRT, &act, NULL);
 919     }
 920 #endif
 921     abort();
 922 }
 923
 924 #if !defined(CONFIG_USER_ONLY)
 925 /* Called from RCU critical section */
 926 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
 927 {
 928     RAMBlock *block;
 929
 930     block = atomic_rcu_read(&ram_list.mru_block);
 931     if (block && addr - block->offset < block->max_length) {
 932         return block;
 933     }
 934     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
 935         if (addr - block->offset < block->max_length) {
 936             goto found;
 937         }
 938     }
 939
 940     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
 941     abort();
 942
 943 found:
 944     /* It is safe to write mru_block outside the iothread lock.  This
 945      * is what happens:
 946      *
 947      *     mru_block = xxx
 948      *     rcu_read_unlock()
 949      *                                        xxx removed from list
 950      *                  rcu_read_lock()
 951      *                  read mru_block
 952      *                                        mru_block = NULL;
 953      *                                        call_rcu(reclaim_ramblock, xxx);
 954      *                  rcu_read_unlock()
 955      *
 956      * atomic_rcu_set is not needed here.  The block was already published
 957      * when it was placed into the list.  Here we're just making an extra
 958      * copy of the pointer.
 959      */
 960     ram_list.mru_block = block;
 961     return block;
 962 }
 963
 964 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
 965 {
 966     CPUState *cpu;
 967     ram_addr_t start1;
 968     RAMBlock *block;
 969     ram_addr_t end;
 970
 971     end = TARGET_PAGE_ALIGN(start + length);
 972     start &= TARGET_PAGE_MASK;
 973
 974     rcu_read_lock();
 975     block = qemu_get_ram_block(start);
 976     assert(block == qemu_get_ram_block(end - 1));
 977     start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
 978     CPU_FOREACH(cpu) {
 979         tlb_reset_dirty(cpu, start1, length);
 980     }
 981     rcu_read_unlock();
 982 }
 983
 984 /* Note: start and end must be within the same ram block.  */
 985 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
 986                                               ram_addr_t length,
 987                                               unsigned client)
 988 {
 989     DirtyMemoryBlocks *blocks;
 990     unsigned long end, page;
 991     bool dirty = false;
 992
 993     if (length == 0) {
 994         return false;
 995     }
 996
 997     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
 998     page = start >> TARGET_PAGE_BITS;
 999
1000     rcu_read_lock();
1001
1002     blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1003
1004     while (page < end) {
1005         unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1006         unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1007         unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1008
1009         dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1010                                               offset, num);
1011         page += num;
1012     }
1013
1014     rcu_read_unlock();
1015
1016     if (dirty && tcg_enabled()) {
1017         tlb_reset_dirty_range_all(start, length);
1018     }
1019
1020     return dirty;
1021 }
1022
1023 /* Called from RCU critical section */
1024 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1025                                        MemoryRegionSection *section,
1026                                        target_ulong vaddr,
1027                                        hwaddr paddr, hwaddr xlat,
1028                                        int prot,
1029                                        target_ulong *address)
1030 {
1031     hwaddr iotlb;
1032     CPUWatchpoint *wp;
1033
1034     if (memory_region_is_ram(section->mr)) {
1035         /* Normal RAM.  */
1036         iotlb = memory_region_get_ram_addr(section->mr) + xlat;
1037         if (!section->readonly) {
1038             iotlb |= PHYS_SECTION_NOTDIRTY;
1039         } else {
1040             iotlb |= PHYS_SECTION_ROM;
1041         }
1042     } else {
1043         AddressSpaceDispatch *d;
1044
1045         d = atomic_rcu_read(&section->address_space->dispatch);
1046         iotlb = section - d->map.sections;
1047         iotlb += xlat;
1048     }
1049
1050     /* Make accesses to pages with watchpoints go via the
1051        watchpoint trap routines.  */
1052     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1053         if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1054             /* Avoid trapping reads of pages with a write breakpoint. */
1055             if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1056                 iotlb = PHYS_SECTION_WATCH + paddr;
1057                 *address |= TLB_MMIO;
1058                 break;
1059             }
1060         }
1061     }
1062
1063     return iotlb;
1064 }
1065 #endif /* defined(CONFIG_USER_ONLY) */
1066
1067 #if !defined(CONFIG_USER_ONLY)
1068
1069 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1070                              uint16_t section);
1071 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1072
1073 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1074                                qemu_anon_ram_alloc;
1075
1076 /*
1077  * Set a custom physical guest memory alloator.
1078  * Accelerators with unusual needs may need this.  Hopefully, we can
1079  * get rid of it eventually.
1080  */
1081 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1082 {
1083     phys_mem_alloc = alloc;
1084 }
1085
1086 static uint16_t phys_section_add(PhysPageMap *map,
1087                                  MemoryRegionSection *section)
1088 {
1089     /* The physical section number is ORed with a page-aligned
1090      * pointer to produce the iotlb entries.  Thus it should
1091      * never overflow into the page-aligned value.
1092      */
1093     assert(map->sections_nb < TARGET_PAGE_SIZE);
1094
1095     if (map->sections_nb == map->sections_nb_alloc) {
1096         map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1097         map->sections = g_renew(MemoryRegionSection, map->sections,
1098                                 map->sections_nb_alloc);
1099     }
1100     map->sections[map->sections_nb] = *section;
1101     memory_region_ref(section->mr);
1102     return map->sections_nb++;
1103 }
1104
1105 static void phys_section_destroy(MemoryRegion *mr)
1106 {
1107     bool have_sub_page = mr->subpage;
1108
1109     memory_region_unref(mr);
1110
1111     if (have_sub_page) {
1112         subpage_t *subpage = container_of(mr, subpage_t, iomem);
1113         object_unref(OBJECT(&subpage->iomem));
1114         g_free(subpage);
1115     }
1116 }
1117
1118 static void phys_sections_free(PhysPageMap *map)
1119 {
1120     while (map->sections_nb > 0) {
1121         MemoryRegionSection *section = &map->sections[--map->sections_nb];
1122         phys_section_destroy(section->mr);
1123     }
1124     g_free(map->sections);
1125     g_free(map->nodes);
1126 }
1127
1128 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1129 {
1130     subpage_t *subpage;
1131     hwaddr base = section->offset_within_address_space
1132         & TARGET_PAGE_MASK;
1133     MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1134                                                    d->map.nodes, d->map.sections);
1135     MemoryRegionSection subsection = {
1136         .offset_within_address_space = base,
1137         .size = int128_make64(TARGET_PAGE_SIZE),
1138     };
1139     hwaddr start, end;
1140
1141     assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1142
1143     if (!(existing->mr->subpage)) {
1144         subpage = subpage_init(d->as, base);
1145         subsection.address_space = d->as;
1146         subsection.mr = &subpage->iomem;
1147         phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1148                       phys_section_add(&d->map, &subsection));
1149     } else {
1150         subpage = container_of(existing->mr, subpage_t, iomem);
1151     }
1152     start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1153     end = start + int128_get64(section->size) - 1;
1154     subpage_register(subpage, start, end,
1155                      phys_section_add(&d->map, section));
1156 }
1157
1158
1159 static void register_multipage(AddressSpaceDispatch *d,
1160                                MemoryRegionSection *section)
1161 {
1162     hwaddr start_addr = section->offset_within_address_space;
1163     uint16_t section_index = phys_section_add(&d->map, section);
1164     uint64_t num_pages = int128_get64(int128_rshift(section->size,
1165                                                     TARGET_PAGE_BITS));
1166
1167     assert(num_pages);
1168     phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1169 }
1170
1171 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1172 {
1173     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1174     AddressSpaceDispatch *d = as->next_dispatch;
1175     MemoryRegionSection now = *section, remain = *section;
1176     Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1177
1178     if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1179         uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1180                        - now.offset_within_address_space;
1181
1182         now.size = int128_min(int128_make64(left), now.size);
1183         register_subpage(d, &now);
1184     } else {
1185         now.size = int128_zero();
1186     }
1187     while (int128_ne(remain.size, now.size)) {
1188         remain.size = int128_sub(remain.size, now.size);
1189         remain.offset_within_address_space += int128_get64(now.size);
1190         remain.offset_within_region += int128_get64(now.size);
1191         now = remain;
1192         if (int128_lt(remain.size, page_size)) {
1193             register_subpage(d, &now);
1194         } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1195             now.size = page_size;
1196             register_subpage(d, &now);
1197         } else {
1198             now.size = int128_and(now.size, int128_neg(page_size));
1199             register_multipage(d, &now);
1200         }
1201     }
1202 }
1203
1204 void qemu_flush_coalesced_mmio_buffer(void)
1205 {
1206     if (kvm_enabled())
1207         kvm_flush_coalesced_mmio_buffer();
1208 }
1209
1210 void qemu_mutex_lock_ramlist(void)
1211 {
1212     qemu_mutex_lock(&ram_list.mutex);
1213 }
1214
1215 void qemu_mutex_unlock_ramlist(void)
1216 {
1217     qemu_mutex_unlock(&ram_list.mutex);
1218 }
1219
1220 #ifdef __linux__
1221 static int64_t get_file_size(int fd)
1222 {
1223     int64_t size = lseek(fd, 0, SEEK_END);
1224     if (size < 0) {
1225         return -errno;
1226     }
1227     return size;
1228 }
1229
1230 static void *file_ram_alloc(RAMBlock *block,
1231                             ram_addr_t memory,
1232                             const char *path,
1233                             Error **errp)
1234 {
1235     bool unlink_on_error = false;
1236     char *filename;
1237     char *sanitized_name;
1238     char *c;
1239     void *area = MAP_FAILED;
1240     int fd = -1;
1241     int64_t file_size;
1242
1243     if (kvm_enabled() && !kvm_has_sync_mmu()) {
1244         error_setg(errp,
1245                    "host lacks kvm mmu notifiers, -mem-path unsupported");
1246         return NULL;
1247     }
1248
1249     for (;;) {
1250         fd = open(path, O_RDWR);
1251         if (fd >= 0) {
1252             /* @path names an existing file, use it */
1253             break;
1254         }
1255         if (errno == ENOENT) {
1256             /* @path names a file that doesn't exist, create it */
1257             fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1258             if (fd >= 0) {
1259                 unlink_on_error = true;
1260                 break;
1261             }
1262         } else if (errno == EISDIR) {
1263             /* @path names a directory, create a file there */
1264             /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1265             sanitized_name = g_strdup(memory_region_name(block->mr));
1266             for (c = sanitized_name; *c != '\0'; c++) {
1267                 if (*c == '/') {
1268                     *c = '_';
1269                 }
1270             }
1271
1272             filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1273                                        sanitized_name);
1274             g_free(sanitized_name);
1275
1276             fd = mkstemp(filename);
1277             if (fd >= 0) {
1278                 unlink(filename);
1279                 g_free(filename);
1280                 break;
1281             }
1282             g_free(filename);
1283         }
1284         if (errno != EEXIST && errno != EINTR) {
1285             error_setg_errno(errp, errno,
1286                              "can't open backing store %s for guest RAM",
1287                              path);
1288             goto error;
1289         }
1290         /*
1291          * Try again on EINTR and EEXIST.  The latter happens when
1292          * something else creates the file between our two open().
1293          */
1294     }
1295
1296     block->page_size = qemu_fd_getpagesize(fd);
1297     block->mr->align = block->page_size;
1298 #if defined(__s390x__)
1299     if (kvm_enabled()) {
1300         block->mr->align = MAX(block->mr->align, QEMU_VMALLOC_ALIGN);
1301     }
1302 #endif
1303
1304     file_size = get_file_size(fd);
1305
1306     if (memory < block->page_size) {
1307         error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1308                    "or larger than page size 0x%zx",
1309                    memory, block->page_size);
1310         goto error;
1311     }
1312
1313     if (file_size > 0 && file_size < memory) {
1314         error_setg(errp, "backing store %s size 0x%" PRIx64
1315                    " does not match 'size' option 0x" RAM_ADDR_FMT,
1316                    path, file_size, memory);
1317         goto error;
1318     }
1319
1320     memory = ROUND_UP(memory, block->page_size);
1321
1322     /*
1323      * ftruncate is not supported by hugetlbfs in older
1324      * hosts, so don't bother bailing out on errors.
1325      * If anything goes wrong with it under other filesystems,
1326      * mmap will fail.
1327      *
1328      * Do not truncate the non-empty backend file to avoid corrupting
1329      * the existing data in the file. Disabling shrinking is not
1330      * enough. For example, the current vNVDIMM implementation stores
1331      * the guest NVDIMM labels at the end of the backend file. If the
1332      * backend file is later extended, QEMU will not be able to find
1333      * those labels. Therefore, extending the non-empty backend file
1334      * is disabled as well.
1335      */
1336     if (!file_size && ftruncate(fd, memory)) {
1337         perror("ftruncate");
1338     }
1339
1340     area = qemu_ram_mmap(fd, memory, block->mr->align,
1341                          block->flags & RAM_SHARED);
1342     if (area == MAP_FAILED) {
1343         error_setg_errno(errp, errno,
1344                          "unable to map backing store for guest RAM");
1345         goto error;
1346     }
1347
1348     if (mem_prealloc) {
1349         os_mem_prealloc(fd, area, memory, errp);
1350         if (errp && *errp) {
1351             goto error;
1352         }
1353     }
1354
1355     block->fd = fd;
1356     return area;
1357
1358 error:
1359     if (area != MAP_FAILED) {
1360         qemu_ram_munmap(area, memory);
1361     }
1362     if (unlink_on_error) {
1363         unlink(path);
1364     }
1365     if (fd != -1) {
1366         close(fd);
1367     }
1368     return NULL;
1369 }
1370 #endif
1371
1372 /* Called with the ramlist lock held.  */
1373 static ram_addr_t find_ram_offset(ram_addr_t size)
1374 {
1375     RAMBlock *block, *next_block;
1376     ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1377
1378     assert(size != 0); /* it would hand out same offset multiple times */
1379
1380     if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1381         return 0;
1382     }
1383
1384     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1385         ram_addr_t end, next = RAM_ADDR_MAX;
1386
1387         end = block->offset + block->max_length;
1388
1389         QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1390             if (next_block->offset >= end) {
1391                 next = MIN(next, next_block->offset);
1392             }
1393         }
1394         if (next - end >= size && next - end < mingap) {
1395             offset = end;
1396             mingap = next - end;
1397         }
1398     }
1399
1400     if (offset == RAM_ADDR_MAX) {
1401         fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1402                 (uint64_t)size);
1403         abort();
1404     }
1405
1406     return offset;
1407 }
1408
1409 ram_addr_t last_ram_offset(void)
1410 {
1411     RAMBlock *block;
1412     ram_addr_t last = 0;
1413
1414     rcu_read_lock();
1415     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1416         last = MAX(last, block->offset + block->max_length);
1417     }
1418     rcu_read_unlock();
1419     return last;
1420 }
1421
1422 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1423 {
1424     int ret;
1425
1426     /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1427     if (!machine_dump_guest_core(current_machine)) {
1428         ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1429         if (ret) {
1430             perror("qemu_madvise");
1431             fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1432                             "but dump_guest_core=off specified\n");
1433         }
1434     }
1435 }
1436
1437 const char *qemu_ram_get_idstr(RAMBlock *rb)
1438 {
1439     return rb->idstr;
1440 }
1441
1442 /* Called with iothread lock held.  */
1443 void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
1444 {
1445     RAMBlock *block;
1446
1447     assert(new_block);
1448     assert(!new_block->idstr[0]);
1449
1450     if (dev) {
1451         char *id = qdev_get_dev_path(dev);
1452         if (id) {
1453             snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1454             g_free(id);
1455         }
1456     }
1457     pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1458
1459     rcu_read_lock();
1460     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1461         if (block != new_block &&
1462             !strcmp(block->idstr, new_block->idstr)) {
1463             fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1464                     new_block->idstr);
1465             abort();
1466         }
1467     }
1468     rcu_read_unlock();
1469 }
1470
1471 /* Called with iothread lock held.  */
1472 void qemu_ram_unset_idstr(RAMBlock *block)
1473 {
1474     /* FIXME: arch_init.c assumes that this is not called throughout
1475      * migration.  Ignore the problem since hot-unplug during migration
1476      * does not work anyway.
1477      */
1478     if (block) {
1479         memset(block->idstr, 0, sizeof(block->idstr));
1480     }
1481 }
1482
1483 size_t qemu_ram_pagesize(RAMBlock *rb)
1484 {
1485     return rb->page_size;
1486 }
1487
1488 static int memory_try_enable_merging(void *addr, size_t len)
1489 {
1490     if (!machine_mem_merge(current_machine)) {
1491         /* disabled by the user */
1492         return 0;
1493     }
1494
1495     return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1496 }
1497
1498 /* Only legal before guest might have detected the memory size: e.g. on
1499  * incoming migration, or right after reset.
1500  *
1501  * As memory core doesn't know how is memory accessed, it is up to
1502  * resize callback to update device state and/or add assertions to detect
1503  * misuse, if necessary.
1504  */
1505 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
1506 {
1507     assert(block);
1508
1509     newsize = HOST_PAGE_ALIGN(newsize);
1510
1511     if (block->used_length == newsize) {
1512         return 0;
1513     }
1514
1515     if (!(block->flags & RAM_RESIZEABLE)) {
1516         error_setg_errno(errp, EINVAL,
1517                          "Length mismatch: %s: 0x" RAM_ADDR_FMT
1518                          " in != 0x" RAM_ADDR_FMT, block->idstr,
1519                          newsize, block->used_length);
1520         return -EINVAL;
1521     }
1522
1523     if (block->max_length < newsize) {
1524         error_setg_errno(errp, EINVAL,
1525                          "Length too large: %s: 0x" RAM_ADDR_FMT
1526                          " > 0x" RAM_ADDR_FMT, block->idstr,
1527                          newsize, block->max_length);
1528         return -EINVAL;
1529     }
1530
1531     cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1532     block->used_length = newsize;
1533     cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1534                                         DIRTY_CLIENTS_ALL);
1535     memory_region_set_size(block->mr, newsize);
1536     if (block->resized) {
1537         block->resized(block->idstr, newsize, block->host);
1538     }
1539     return 0;
1540 }
1541
1542 /* Called with ram_list.mutex held */
1543 static void dirty_memory_extend(ram_addr_t old_ram_size,
1544                                 ram_addr_t new_ram_size)
1545 {
1546     ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1547                                              DIRTY_MEMORY_BLOCK_SIZE);
1548     ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1549                                              DIRTY_MEMORY_BLOCK_SIZE);
1550     int i;
1551
1552     /* Only need to extend if block count increased */
1553     if (new_num_blocks <= old_num_blocks) {
1554         return;
1555     }
1556
1557     for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1558         DirtyMemoryBlocks *old_blocks;
1559         DirtyMemoryBlocks *new_blocks;
1560         int j;
1561
1562         old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1563         new_blocks = g_malloc(sizeof(*new_blocks) +
1564                               sizeof(new_blocks->blocks[0]) * new_num_blocks);
1565
1566         if (old_num_blocks) {
1567             memcpy(new_blocks->blocks, old_blocks->blocks,
1568                    old_num_blocks * sizeof(old_blocks->blocks[0]));
1569         }
1570
1571         for (j = old_num_blocks; j < new_num_blocks; j++) {
1572             new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1573         }
1574
1575         atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1576
1577         if (old_blocks) {
1578             g_free_rcu(old_blocks, rcu);
1579         }
1580     }
1581 }
1582
1583 static void ram_block_add(RAMBlock *new_block, Error **errp)
1584 {
1585     RAMBlock *block;
1586     RAMBlock *last_block = NULL;
1587     ram_addr_t old_ram_size, new_ram_size;
1588     Error *err = NULL;
1589
1590     old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1591
1592     qemu_mutex_lock_ramlist();
1593     new_block->offset = find_ram_offset(new_block->max_length);
1594
1595     if (!new_block->host) {
1596         if (xen_enabled()) {
1597             xen_ram_alloc(new_block->offset, new_block->max_length,
1598                           new_block->mr, &err);
1599             if (err) {
1600                 error_propagate(errp, err);
1601                 qemu_mutex_unlock_ramlist();
1602                 return;
1603             }
1604         } else {
1605             new_block->host = phys_mem_alloc(new_block->max_length,
1606                                              &new_block->mr->align);
1607             if (!new_block->host) {
1608                 error_setg_errno(errp, errno,
1609                                  "cannot set up guest memory '%s'",
1610                                  memory_region_name(new_block->mr));
1611                 qemu_mutex_unlock_ramlist();
1612                 return;
1613             }
1614             memory_try_enable_merging(new_block->host, new_block->max_length);
1615         }
1616     }
1617
1618     new_ram_size = MAX(old_ram_size,
1619               (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1620     if (new_ram_size > old_ram_size) {
1621         migration_bitmap_extend(old_ram_size, new_ram_size);
1622         dirty_memory_extend(old_ram_size, new_ram_size);
1623     }
1624     /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
1625      * QLIST (which has an RCU-friendly variant) does not have insertion at
1626      * tail, so save the last element in last_block.
1627      */
1628     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1629         last_block = block;
1630         if (block->max_length < new_block->max_length) {
1631             break;
1632         }
1633     }
1634     if (block) {
1635         QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1636     } else if (last_block) {
1637         QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1638     } else { /* list is empty */
1639         QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1640     }
1641     ram_list.mru_block = NULL;
1642
1643     /* Write list before version */
1644     smp_wmb();
1645     ram_list.version++;
1646     qemu_mutex_unlock_ramlist();
1647
1648     cpu_physical_memory_set_dirty_range(new_block->offset,
1649                                         new_block->used_length,
1650                                         DIRTY_CLIENTS_ALL);
1651
1652     if (new_block->host) {
1653         qemu_ram_setup_dump(new_block->host, new_block->max_length);
1654         qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1655         /* MADV_DONTFORK is also needed by KVM in absence of synchronous MMU */
1656         qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1657     }
1658 }
1659
1660 #ifdef __linux__
1661 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1662                                    bool share, const char *mem_path,
1663                                    Error **errp)
1664 {
1665     RAMBlock *new_block;
1666     Error *local_err = NULL;
1667
1668     if (xen_enabled()) {
1669         error_setg(errp, "-mem-path not supported with Xen");
1670         return NULL;
1671     }
1672
1673     if (phys_mem_alloc != qemu_anon_ram_alloc) {
1674         /*
1675          * file_ram_alloc() needs to allocate just like
1676          * phys_mem_alloc, but we haven't bothered to provide
1677          * a hook there.
1678          */
1679         error_setg(errp,
1680                    "-mem-path not supported with this accelerator");
1681         return NULL;
1682     }
1683
1684     size = HOST_PAGE_ALIGN(size);
1685     new_block = g_malloc0(sizeof(*new_block));
1686     new_block->mr = mr;
1687     new_block->used_length = size;
1688     new_block->max_length = size;
1689     new_block->flags = share ? RAM_SHARED : 0;
1690     new_block->host = file_ram_alloc(new_block, size,
1691                                      mem_path, errp);
1692     if (!new_block->host) {
1693         g_free(new_block);
1694         return NULL;
1695     }
1696
1697     ram_block_add(new_block, &local_err);
1698     if (local_err) {
1699         g_free(new_block);
1700         error_propagate(errp, local_err);
1701         return NULL;
1702     }
1703     return new_block;
1704 }
1705 #endif
1706
1707 static
1708 RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1709                                   void (*resized)(const char*,
1710                                                   uint64_t length,
1711                                                   void *host),
1712                                   void *host, bool resizeable,
1713                                   MemoryRegion *mr, Error **errp)
1714 {
1715     RAMBlock *new_block;
1716     Error *local_err = NULL;
1717
1718     size = HOST_PAGE_ALIGN(size);
1719     max_size = HOST_PAGE_ALIGN(max_size);
1720     new_block = g_malloc0(sizeof(*new_block));
1721     new_block->mr = mr;
1722     new_block->resized = resized;
1723     new_block->used_length = size;
1724     new_block->max_length = max_size;
1725     assert(max_size >= size);
1726     new_block->fd = -1;
1727     new_block->page_size = getpagesize();
1728     new_block->host = host;
1729     if (host) {
1730         new_block->flags |= RAM_PREALLOC;
1731     }
1732     if (resizeable) {
1733         new_block->flags |= RAM_RESIZEABLE;
1734     }
1735     ram_block_add(new_block, &local_err);
1736     if (local_err) {
1737         g_free(new_block);
1738         error_propagate(errp, local_err);
1739         return NULL;
1740     }
1741     return new_block;
1742 }
1743
1744 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1745                                    MemoryRegion *mr, Error **errp)
1746 {
1747     return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1748 }
1749
1750 RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1751 {
1752     return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1753 }
1754
1755 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1756                                      void (*resized)(const char*,
1757                                                      uint64_t length,
1758                                                      void *host),
1759                                      MemoryRegion *mr, Error **errp)
1760 {
1761     return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1762 }
1763
1764 static void reclaim_ramblock(RAMBlock *block)
1765 {
1766     if (block->flags & RAM_PREALLOC) {
1767         ;
1768     } else if (xen_enabled()) {
1769         xen_invalidate_map_cache_entry(block->host);
1770 #ifndef _WIN32
1771     } else if (block->fd >= 0) {
1772         qemu_ram_munmap(block->host, block->max_length);
1773         close(block->fd);
1774 #endif
1775     } else {
1776         qemu_anon_ram_free(block->host, block->max_length);
1777     }
1778     g_free(block);
1779 }
1780
1781 void qemu_ram_free(RAMBlock *block)
1782 {
1783     if (!block) {
1784         return;
1785     }
1786
1787     qemu_mutex_lock_ramlist();
1788     QLIST_REMOVE_RCU(block, next);
1789     ram_list.mru_block = NULL;
1790     /* Write list before version */
1791     smp_wmb();
1792     ram_list.version++;
1793     call_rcu(block, reclaim_ramblock, rcu);
1794     qemu_mutex_unlock_ramlist();
1795 }
1796
1797 #ifndef _WIN32
1798 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1799 {
1800     RAMBlock *block;
1801     ram_addr_t offset;
1802     int flags;
1803     void *area, *vaddr;
1804
1805     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1806         offset = addr - block->offset;
1807         if (offset < block->max_length) {
1808             vaddr = ramblock_ptr(block, offset);
1809             if (block->flags & RAM_PREALLOC) {
1810                 ;
1811             } else if (xen_enabled()) {
1812                 abort();
1813             } else {
1814                 flags = MAP_FIXED;
1815                 if (block->fd >= 0) {
1816                     flags |= (block->flags & RAM_SHARED ?
1817                               MAP_SHARED : MAP_PRIVATE);
1818                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1819                                 flags, block->fd, offset);
1820                 } else {
1821                     /*
1822                      * Remap needs to match alloc.  Accelerators that
1823                      * set phys_mem_alloc never remap.  If they did,
1824                      * we'd need a remap hook here.
1825                      */
1826                     assert(phys_mem_alloc == qemu_anon_ram_alloc);
1827
1828                     flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1829                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1830                                 flags, -1, 0);
1831                 }
1832                 if (area != vaddr) {
1833                     fprintf(stderr, "Could not remap addr: "
1834                             RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1835                             length, addr);
1836                     exit(1);
1837                 }
1838                 memory_try_enable_merging(vaddr, length);
1839                 qemu_ram_setup_dump(vaddr, length);
1840             }
1841         }
1842     }
1843 }
1844 #endif /* !_WIN32 */
1845
1846 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1847  * This should not be used for general purpose DMA.  Use address_space_map
1848  * or address_space_rw instead. For local memory (e.g. video ram) that the
1849  * device owns, use memory_region_get_ram_ptr.
1850  *
1851  * Called within RCU critical section.
1852  */
1853 void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1854 {
1855     RAMBlock *block = ram_block;
1856
1857     if (block == NULL) {
1858         block = qemu_get_ram_block(addr);
1859         addr -= block->offset;
1860     }
1861
1862     if (xen_enabled() && block->host == NULL) {
1863         /* We need to check if the requested address is in the RAM
1864          * because we don't want to map the entire memory in QEMU.
1865          * In that case just map until the end of the page.
1866          */
1867         if (block->offset == 0) {
1868             return xen_map_cache(addr, 0, 0);
1869         }
1870
1871         block->host = xen_map_cache(block->offset, block->max_length, 1);
1872     }
1873     return ramblock_ptr(block, addr);
1874 }
1875
1876 /* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr
1877  * but takes a size argument.
1878  *
1879  * Called within RCU critical section.
1880  */
1881 static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1882                                  hwaddr *size)
1883 {
1884     RAMBlock *block = ram_block;
1885     if (*size == 0) {
1886         return NULL;
1887     }
1888
1889     if (block == NULL) {
1890         block = qemu_get_ram_block(addr);
1891         addr -= block->offset;
1892     }
1893     *size = MIN(*size, block->max_length - addr);
1894
1895     if (xen_enabled() && block->host == NULL) {
1896         /* We need to check if the requested address is in the RAM
1897          * because we don't want to map the entire memory in QEMU.
1898          * In that case just map the requested area.
1899          */
1900         if (block->offset == 0) {
1901             return xen_map_cache(addr, *size, 1);
1902         }
1903
1904         block->host = xen_map_cache(block->offset, block->max_length, 1);
1905     }
1906
1907     return ramblock_ptr(block, addr);
1908 }
1909
1910 /*
1911  * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1912  * in that RAMBlock.
1913  *
1914  * ptr: Host pointer to look up
1915  * round_offset: If true round the result offset down to a page boundary
1916  * *ram_addr: set to result ram_addr
1917  * *offset: set to result offset within the RAMBlock
1918  *
1919  * Returns: RAMBlock (or NULL if not found)
1920  *
1921  * By the time this function returns, the returned pointer is not protected
1922  * by RCU anymore.  If the caller is not within an RCU critical section and
1923  * does not hold the iothread lock, it must have other means of protecting the
1924  * pointer, such as a reference to the region that includes the incoming
1925  * ram_addr_t.
1926  */
1927 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1928                                    ram_addr_t *offset)
1929 {
1930     RAMBlock *block;
1931     uint8_t *host = ptr;
1932
1933     if (xen_enabled()) {
1934         ram_addr_t ram_addr;
1935         rcu_read_lock();
1936         ram_addr = xen_ram_addr_from_mapcache(ptr);
1937         block = qemu_get_ram_block(ram_addr);
1938         if (block) {
1939             *offset = ram_addr - block->offset;
1940         }
1941         rcu_read_unlock();
1942         return block;
1943     }
1944
1945     rcu_read_lock();
1946     block = atomic_rcu_read(&ram_list.mru_block);
1947     if (block && block->host && host - block->host < block->max_length) {
1948         goto found;
1949     }
1950
1951     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1952         /* This case append when the block is not mapped. */
1953         if (block->host == NULL) {
1954             continue;
1955         }
1956         if (host - block->host < block->max_length) {
1957             goto found;
1958         }
1959     }
1960
1961     rcu_read_unlock();
1962     return NULL;
1963
1964 found:
1965     *offset = (host - block->host);
1966     if (round_offset) {
1967         *offset &= TARGET_PAGE_MASK;
1968     }
1969     rcu_read_unlock();
1970     return block;
1971 }
1972
1973 /*
1974  * Finds the named RAMBlock
1975  *
1976  * name: The name of RAMBlock to find
1977  *
1978  * Returns: RAMBlock (or NULL if not found)
1979  */
1980 RAMBlock *qemu_ram_block_by_name(const char *name)
1981 {
1982     RAMBlock *block;
1983
1984     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1985         if (!strcmp(name, block->idstr)) {
1986             return block;
1987         }
1988     }
1989
1990     return NULL;
1991 }
1992
1993 /* Some of the softmmu routines need to translate from a host pointer
1994    (typically a TLB entry) back to a ram offset.  */
1995 ram_addr_t qemu_ram_addr_from_host(void *ptr)
1996 {
1997     RAMBlock *block;
1998     ram_addr_t offset;
1999
2000     block = qemu_ram_block_from_host(ptr, false, &offset);
2001     if (!block) {
2002         return RAM_ADDR_INVALID;
2003     }
2004
2005     return block->offset + offset;
2006 }
2007
2008 /* Called within RCU critical section.  */
2009 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2010                                uint64_t val, unsigned size)
2011 {
2012     bool locked = false;
2013
2014     if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
2015         locked = true;
2016         tb_lock();
2017         tb_invalidate_phys_page_fast(ram_addr, size);
2018     }
2019     switch (size) {
2020     case 1:
2021         stb_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2022         break;
2023     case 2:
2024         stw_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2025         break;
2026     case 4:
2027         stl_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2028         break;
2029     default:
2030         abort();
2031     }
2032
2033     if (locked) {
2034         tb_unlock();
2035     }
2036
2037     /* Set both VGA and migration bits for simplicity and to remove
2038      * the notdirty callback faster.
2039      */
2040     cpu_physical_memory_set_dirty_range(ram_addr, size,
2041                                         DIRTY_CLIENTS_NOCODE);
2042     /* we remove the notdirty callback only if the code has been
2043        flushed */
2044     if (!cpu_physical_memory_is_clean(ram_addr)) {
2045         tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2046     }
2047 }
2048
2049 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2050                                  unsigned size, bool is_write)
2051 {
2052     return is_write;
2053 }
2054
2055 static const MemoryRegionOps notdirty_mem_ops = {
2056     .write = notdirty_mem_write,
2057     .valid.accepts = notdirty_mem_accepts,
2058     .endianness = DEVICE_NATIVE_ENDIAN,
2059 };
2060
2061 /* Generate a debug exception if a watchpoint has been hit.  */
2062 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2063 {
2064     CPUState *cpu = current_cpu;
2065     CPUClass *cc = CPU_GET_CLASS(cpu);
2066     CPUArchState *env = cpu->env_ptr;
2067     target_ulong pc, cs_base;
2068     target_ulong vaddr;
2069     CPUWatchpoint *wp;
2070     uint32_t cpu_flags;
2071
2072     if (cpu->watchpoint_hit) {
2073         /* We re-entered the check after replacing the TB. Now raise
2074          * the debug interrupt so that is will trigger after the
2075          * current instruction. */
2076         cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2077         return;
2078     }
2079     vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2080     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2081         if (cpu_watchpoint_address_matches(wp, vaddr, len)
2082             && (wp->flags & flags)) {
2083             if (flags == BP_MEM_READ) {
2084                 wp->flags |= BP_WATCHPOINT_HIT_READ;
2085             } else {
2086                 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2087             }
2088             wp->hitaddr = vaddr;
2089             wp->hitattrs = attrs;
2090             if (!cpu->watchpoint_hit) {
2091                 if (wp->flags & BP_CPU &&
2092                     !cc->debug_check_watchpoint(cpu, wp)) {
2093                     wp->flags &= ~BP_WATCHPOINT_HIT;
2094                     continue;
2095                 }
2096                 cpu->watchpoint_hit = wp;
2097
2098                 /* The tb_lock will be reset when cpu_loop_exit or
2099                  * cpu_loop_exit_noexc longjmp back into the cpu_exec
2100                  * main loop.
2101                  */
2102                 tb_lock();
2103                 tb_check_watchpoint(cpu);
2104                 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2105                     cpu->exception_index = EXCP_DEBUG;
2106                     cpu_loop_exit(cpu);
2107                 } else {
2108                     cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2109                     tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2110                     cpu_loop_exit_noexc(cpu);
2111                 }
2112             }
2113         } else {
2114             wp->flags &= ~BP_WATCHPOINT_HIT;
2115         }
2116     }
2117 }
2118
2119 /* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
2120    so these check for a hit then pass through to the normal out-of-line
2121    phys routines.  */
2122 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2123                                   unsigned size, MemTxAttrs attrs)
2124 {
2125     MemTxResult res;
2126     uint64_t data;
2127     int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2128     AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2129
2130     check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2131     switch (size) {
2132     case 1:
2133         data = address_space_ldub(as, addr, attrs, &res);
2134         break;
2135     case 2:
2136         data = address_space_lduw(as, addr, attrs, &res);
2137         break;
2138     case 4:
2139         data = address_space_ldl(as, addr, attrs, &res);
2140         break;
2141     default: abort();
2142     }
2143     *pdata = data;
2144     return res;
2145 }
2146
2147 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2148                                    uint64_t val, unsigned size,
2149                                    MemTxAttrs attrs)
2150 {
2151     MemTxResult res;
2152     int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2153     AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2154
2155     check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2156     switch (size) {
2157     case 1:
2158         address_space_stb(as, addr, val, attrs, &res);
2159         break;
2160     case 2:
2161         address_space_stw(as, addr, val, attrs, &res);
2162         break;
2163     case 4:
2164         address_space_stl(as, addr, val, attrs, &res);
2165         break;
2166     default: abort();
2167     }
2168     return res;
2169 }
2170
2171 static const MemoryRegionOps watch_mem_ops = {
2172     .read_with_attrs = watch_mem_read,
2173     .write_with_attrs = watch_mem_write,
2174     .endianness = DEVICE_NATIVE_ENDIAN,
2175 };
2176
2177 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2178                                 unsigned len, MemTxAttrs attrs)
2179 {
2180     subpage_t *subpage = opaque;
2181     uint8_t buf[8];
2182     MemTxResult res;
2183
2184 #if defined(DEBUG_SUBPAGE)
2185     printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2186            subpage, len, addr);
2187 #endif
2188     res = address_space_read(subpage->as, addr + subpage->base,
2189                              attrs, buf, len);
2190     if (res) {
2191         return res;
2192     }
2193     switch (len) {
2194     case 1:
2195         *data = ldub_p(buf);
2196         return MEMTX_OK;
2197     case 2:
2198         *data = lduw_p(buf);
2199         return MEMTX_OK;
2200     case 4:
2201         *data = ldl_p(buf);
2202         return MEMTX_OK;
2203     case 8:
2204         *data = ldq_p(buf);
2205         return MEMTX_OK;
2206     default:
2207         abort();
2208     }
2209 }
2210
2211 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2212                                  uint64_t value, unsigned len, MemTxAttrs attrs)
2213 {
2214     subpage_t *subpage = opaque;
2215     uint8_t buf[8];
2216
2217 #if defined(DEBUG_SUBPAGE)
2218     printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2219            " value %"PRIx64"\n",
2220            __func__, subpage, len, addr, value);
2221 #endif
2222     switch (len) {
2223     case 1:
2224         stb_p(buf, value);
2225         break;
2226     case 2:
2227         stw_p(buf, value);
2228         break;
2229     case 4:
2230         stl_p(buf, value);
2231         break;
2232     case 8:
2233         stq_p(buf, value);
2234         break;
2235     default:
2236         abort();
2237     }
2238     return address_space_write(subpage->as, addr + subpage->base,
2239                                attrs, buf, len);
2240 }
2241
2242 static bool subpage_accepts(void *opaque, hwaddr addr,
2243                             unsigned len, bool is_write)
2244 {
2245     subpage_t *subpage = opaque;
2246 #if defined(DEBUG_SUBPAGE)
2247     printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2248            __func__, subpage, is_write ? 'w' : 'r', len, addr);
2249 #endif
2250
2251     return address_space_access_valid(subpage->as, addr + subpage->base,
2252                                       len, is_write);
2253 }
2254
2255 static const MemoryRegionOps subpage_ops = {
2256     .read_with_attrs = subpage_read,
2257     .write_with_attrs = subpage_write,
2258     .impl.min_access_size = 1,
2259     .impl.max_access_size = 8,
2260     .valid.min_access_size = 1,
2261     .valid.max_access_size = 8,
2262     .valid.accepts = subpage_accepts,
2263     .endianness = DEVICE_NATIVE_ENDIAN,
2264 };
2265
2266 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2267                              uint16_t section)
2268 {
2269     int idx, eidx;
2270
2271     if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2272         return -1;
2273     idx = SUBPAGE_IDX(start);
2274     eidx = SUBPAGE_IDX(end);
2275 #if defined(DEBUG_SUBPAGE)
2276     printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2277            __func__, mmio, start, end, idx, eidx, section);
2278 #endif
2279     for (; idx <= eidx; idx++) {
2280         mmio->sub_section[idx] = section;
2281     }
2282
2283     return 0;
2284 }
2285
2286 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2287 {
2288     subpage_t *mmio;
2289
2290     mmio = g_malloc0(sizeof(subpage_t) + TARGET_PAGE_SIZE * sizeof(uint16_t));
2291     mmio->as = as;
2292     mmio->base = base;
2293     memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2294                           NULL, TARGET_PAGE_SIZE);
2295     mmio->iomem.subpage = true;
2296 #if defined(DEBUG_SUBPAGE)
2297     printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2298            mmio, base, TARGET_PAGE_SIZE);
2299 #endif
2300     subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2301
2302     return mmio;
2303 }
2304
2305 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2306                               MemoryRegion *mr)
2307 {
2308     assert(as);
2309     MemoryRegionSection section = {
2310         .address_space = as,
2311         .mr = mr,
2312         .offset_within_address_space = 0,
2313         .offset_within_region = 0,
2314         .size = int128_2_64(),
2315     };
2316
2317     return phys_section_add(map, &section);
2318 }
2319
2320 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2321 {
2322     int asidx = cpu_asidx_from_attrs(cpu, attrs);
2323     CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2324     AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2325     MemoryRegionSection *sections = d->map.sections;
2326
2327     return sections[index & ~TARGET_PAGE_MASK].mr;
2328 }
2329
2330 static void io_mem_init(void)
2331 {
2332     memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2333     memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2334                           NULL, UINT64_MAX);
2335     memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2336                           NULL, UINT64_MAX);
2337     memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2338                           NULL, UINT64_MAX);
2339 }
2340
2341 static void mem_begin(MemoryListener *listener)
2342 {
2343     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2344     AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2345     uint16_t n;
2346
2347     n = dummy_section(&d->map, as, &io_mem_unassigned);
2348     assert(n == PHYS_SECTION_UNASSIGNED);
2349     n = dummy_section(&d->map, as, &io_mem_notdirty);
2350     assert(n == PHYS_SECTION_NOTDIRTY);
2351     n = dummy_section(&d->map, as, &io_mem_rom);
2352     assert(n == PHYS_SECTION_ROM);
2353     n = dummy_section(&d->map, as, &io_mem_watch);
2354     assert(n == PHYS_SECTION_WATCH);
2355
2356     d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2357     d->as = as;
2358     as->next_dispatch = d;
2359 }
2360
2361 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2362 {
2363     phys_sections_free(&d->map);
2364     g_free(d);
2365 }
2366
2367 static void mem_commit(MemoryListener *listener)
2368 {
2369     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2370     AddressSpaceDispatch *cur = as->dispatch;
2371     AddressSpaceDispatch *next = as->next_dispatch;
2372
2373     phys_page_compact_all(next, next->map.nodes_nb);
2374
2375     atomic_rcu_set(&as->dispatch, next);
2376     if (cur) {
2377         call_rcu(cur, address_space_dispatch_free, rcu);
2378     }
2379 }
2380
2381 static void tcg_commit(MemoryListener *listener)
2382 {
2383     CPUAddressSpace *cpuas;
2384     AddressSpaceDispatch *d;
2385
2386     /* since each CPU stores ram addresses in its TLB cache, we must
2387        reset the modified entries */
2388     cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2389     cpu_reloading_memory_map();
2390     /* The CPU and TLB are protected by the iothread lock.
2391      * We reload the dispatch pointer now because cpu_reloading_memory_map()
2392      * may have split the RCU critical section.
2393      */
2394     d = atomic_rcu_read(&cpuas->as->dispatch);
2395     atomic_rcu_set(&cpuas->memory_dispatch, d);
2396     tlb_flush(cpuas->cpu, 1);
2397 }
2398
2399 void address_space_init_dispatch(AddressSpace *as)
2400 {
2401     as->dispatch = NULL;
2402     as->dispatch_listener = (MemoryListener) {
2403         .begin = mem_begin,
2404         .commit = mem_commit,
2405         .region_add = mem_add,
2406         .region_nop = mem_add,
2407         .priority = 0,
2408     };
2409     memory_listener_register(&as->dispatch_listener, as);
2410 }
2411
2412 void address_space_unregister(AddressSpace *as)
2413 {
2414     memory_listener_unregister(&as->dispatch_listener);
2415 }
2416
2417 void address_space_destroy_dispatch(AddressSpace *as)
2418 {
2419     AddressSpaceDispatch *d = as->dispatch;
2420
2421     atomic_rcu_set(&as->dispatch, NULL);
2422     if (d) {
2423         call_rcu(d, address_space_dispatch_free, rcu);
2424     }
2425 }
2426
2427 static void memory_map_init(void)
2428 {
2429     system_memory = g_malloc(sizeof(*system_memory));
2430
2431     memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2432     address_space_init(&address_space_memory, system_memory, "memory");
2433
2434     system_io = g_malloc(sizeof(*system_io));
2435     memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2436                           65536);
2437     address_space_init(&address_space_io, system_io, "I/O");
2438 }
2439
2440 MemoryRegion *get_system_memory(void)
2441 {
2442     return system_memory;
2443 }
2444
2445 MemoryRegion *get_system_io(void)
2446 {
2447     return system_io;
2448 }
2449
2450 #endif /* !defined(CONFIG_USER_ONLY) */
2451
2452 /* physical memory access (slow version, mainly for debug) */
2453 #if defined(CONFIG_USER_ONLY)
2454 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2455                         uint8_t *buf, int len, int is_write)
2456 {
2457     int l, flags;
2458     target_ulong page;
2459     void * p;
2460
2461     while (len > 0) {
2462         page = addr & TARGET_PAGE_MASK;
2463         l = (page + TARGET_PAGE_SIZE) - addr;
2464         if (l > len)
2465             l = len;
2466         flags = page_get_flags(page);
2467         if (!(flags & PAGE_VALID))
2468             return -1;
2469         if (is_write) {
2470             if (!(flags & PAGE_WRITE))
2471                 return -1;
2472             /* XXX: this code should not depend on lock_user */
2473             if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2474                 return -1;
2475             memcpy(p, buf, l);
2476             unlock_user(p, addr, l);
2477         } else {
2478             if (!(flags & PAGE_READ))
2479                 return -1;
2480             /* XXX: this code should not depend on lock_user */
2481             if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2482                 return -1;
2483             memcpy(buf, p, l);
2484             unlock_user(p, addr, 0);
2485         }
2486         len -= l;
2487         buf += l;
2488         addr += l;
2489     }
2490     return 0;
2491 }
2492
2493 #else
2494
2495 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2496                                      hwaddr length)
2497 {
2498     uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2499     addr += memory_region_get_ram_addr(mr);
2500
2501     /* No early return if dirty_log_mask is or becomes 0, because
2502      * cpu_physical_memory_set_dirty_range will still call
2503      * xen_modified_memory.
2504      */
2505     if (dirty_log_mask) {
2506         dirty_log_mask =
2507             cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2508     }
2509     if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2510         tb_lock();
2511         tb_invalidate_phys_range(addr, addr + length);
2512         tb_unlock();
2513         dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2514     }
2515     cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2516 }
2517
2518 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2519 {
2520     unsigned access_size_max = mr->ops->valid.max_access_size;
2521
2522     /* Regions are assumed to support 1-4 byte accesses unless
2523        otherwise specified.  */
2524     if (access_size_max == 0) {
2525         access_size_max = 4;
2526     }
2527
2528     /* Bound the maximum access by the alignment of the address.  */
2529     if (!mr->ops->impl.unaligned) {
2530         unsigned align_size_max = addr & -addr;
2531         if (align_size_max != 0 && align_size_max < access_size_max) {
2532             access_size_max = align_size_max;
2533         }
2534     }
2535
2536     /* Don't attempt accesses larger than the maximum.  */
2537     if (l > access_size_max) {
2538         l = access_size_max;
2539     }
2540     l = pow2floor(l);
2541
2542     return l;
2543 }
2544
2545 static bool prepare_mmio_access(MemoryRegion *mr)
2546 {
2547     bool unlocked = !qemu_mutex_iothread_locked();
2548     bool release_lock = false;
2549
2550     if (unlocked && mr->global_locking) {
2551         qemu_mutex_lock_iothread();
2552         unlocked = false;
2553         release_lock = true;
2554     }
2555     if (mr->flush_coalesced_mmio) {
2556         if (unlocked) {
2557             qemu_mutex_lock_iothread();
2558         }
2559         qemu_flush_coalesced_mmio_buffer();
2560         if (unlocked) {
2561             qemu_mutex_unlock_iothread();
2562         }
2563     }
2564
2565     return release_lock;
2566 }
2567
2568 /* Called within RCU critical section.  */
2569 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2570                                                 MemTxAttrs attrs,
2571                                                 const uint8_t *buf,
2572                                                 int len, hwaddr addr1,
2573                                                 hwaddr l, MemoryRegion *mr)
2574 {
2575     uint8_t *ptr;
2576     uint64_t val;
2577     MemTxResult result = MEMTX_OK;
2578     bool release_lock = false;
2579
2580     for (;;) {
2581         if (!memory_access_is_direct(mr, true)) {
2582             release_lock |= prepare_mmio_access(mr);
2583             l = memory_access_size(mr, l, addr1);
2584             /* XXX: could force current_cpu to NULL to avoid
2585                potential bugs */
2586             switch (l) {
2587             case 8:
2588                 /* 64 bit write access */
2589                 val = ldq_p(buf);
2590                 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2591                                                        attrs);
2592                 break;
2593             case 4:
2594                 /* 32 bit write access */
2595                 val = ldl_p(buf);
2596                 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2597                                                        attrs);
2598                 break;
2599             case 2:
2600                 /* 16 bit write access */
2601                 val = lduw_p(buf);
2602                 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2603                                                        attrs);
2604                 break;
2605             case 1:
2606                 /* 8 bit write access */
2607                 val = ldub_p(buf);
2608                 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2609                                                        attrs);
2610                 break;
2611             default:
2612                 abort();
2613             }
2614         } else {
2615             /* RAM case */
2616             ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2617             memcpy(ptr, buf, l);
2618             invalidate_and_set_dirty(mr, addr1, l);
2619         }
2620
2621         if (release_lock) {
2622             qemu_mutex_unlock_iothread();
2623             release_lock = false;
2624         }
2625
2626         len -= l;
2627         buf += l;
2628         addr += l;
2629
2630         if (!len) {
2631             break;
2632         }
2633
2634         l = len;
2635         mr = address_space_translate(as, addr, &addr1, &l, true);
2636     }
2637
2638     return result;
2639 }
2640
2641 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2642                                 const uint8_t *buf, int len)
2643 {
2644     hwaddr l;
2645     hwaddr addr1;
2646     MemoryRegion *mr;
2647     MemTxResult result = MEMTX_OK;
2648
2649     if (len > 0) {
2650         rcu_read_lock();
2651         l = len;
2652         mr = address_space_translate(as, addr, &addr1, &l, true);
2653         result = address_space_write_continue(as, addr, attrs, buf, len,
2654                                               addr1, l, mr);
2655         rcu_read_unlock();
2656     }
2657
2658     return result;
2659 }
2660
2661 /* Called within RCU critical section.  */
2662 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2663                                         MemTxAttrs attrs, uint8_t *buf,
2664                                         int len, hwaddr addr1, hwaddr l,
2665                                         MemoryRegion *mr)
2666 {
2667     uint8_t *ptr;
2668     uint64_t val;
2669     MemTxResult result = MEMTX_OK;
2670     bool release_lock = false;
2671
2672     for (;;) {
2673         if (!memory_access_is_direct(mr, false)) {
2674             /* I/O case */
2675             release_lock |= prepare_mmio_access(mr);
2676             l = memory_access_size(mr, l, addr1);
2677             switch (l) {
2678             case 8:
2679                 /* 64 bit read access */
2680                 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2681                                                       attrs);
2682                 stq_p(buf, val);
2683                 break;
2684             case 4:
2685                 /* 32 bit read access */
2686                 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2687                                                       attrs);
2688                 stl_p(buf, val);
2689                 break;
2690             case 2:
2691                 /* 16 bit read access */
2692                 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2693                                                       attrs);
2694                 stw_p(buf, val);
2695                 break;
2696             case 1:
2697                 /* 8 bit read access */
2698                 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2699                                                       attrs);
2700                 stb_p(buf, val);
2701                 break;
2702             default:
2703                 abort();
2704             }
2705         } else {
2706             /* RAM case */
2707             ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2708             memcpy(buf, ptr, l);
2709         }
2710
2711         if (release_lock) {
2712             qemu_mutex_unlock_iothread();
2713             release_lock = false;
2714         }
2715
2716         len -= l;
2717         buf += l;
2718         addr += l;
2719
2720         if (!len) {
2721             break;
2722         }
2723
2724         l = len;
2725         mr = address_space_translate(as, addr, &addr1, &l, false);
2726     }
2727
2728     return result;
2729 }
2730
2731 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2732                                     MemTxAttrs attrs, uint8_t *buf, int len)
2733 {
2734     hwaddr l;
2735     hwaddr addr1;
2736     MemoryRegion *mr;
2737     MemTxResult result = MEMTX_OK;
2738
2739     if (len > 0) {
2740         rcu_read_lock();
2741         l = len;
2742         mr = address_space_translate(as, addr, &addr1, &l, false);
2743         result = address_space_read_continue(as, addr, attrs, buf, len,
2744                                              addr1, l, mr);
2745         rcu_read_unlock();
2746     }
2747
2748     return result;
2749 }
2750
2751 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2752                              uint8_t *buf, int len, bool is_write)
2753 {
2754     if (is_write) {
2755         return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2756     } else {
2757         return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2758     }
2759 }
2760
2761 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2762                             int len, int is_write)
2763 {
2764     address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2765                      buf, len, is_write);
2766 }
2767
2768 enum write_rom_type {
2769     WRITE_DATA,
2770     FLUSH_CACHE,
2771 };
2772
2773 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2774     hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2775 {
2776     hwaddr l;
2777     uint8_t *ptr;
2778     hwaddr addr1;
2779     MemoryRegion *mr;
2780
2781     rcu_read_lock();
2782     while (len > 0) {
2783         l = len;
2784         mr = address_space_translate(as, addr, &addr1, &l, true);
2785
2786         if (!(memory_region_is_ram(mr) ||
2787               memory_region_is_romd(mr))) {
2788             l = memory_access_size(mr, l, addr1);
2789         } else {
2790             /* ROM/RAM case */
2791             ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2792             switch (type) {
2793             case WRITE_DATA:
2794                 memcpy(ptr, buf, l);
2795                 invalidate_and_set_dirty(mr, addr1, l);
2796                 break;
2797             case FLUSH_CACHE:
2798                 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2799                 break;
2800             }
2801         }
2802         len -= l;
2803         buf += l;
2804         addr += l;
2805     }
2806     rcu_read_unlock();
2807 }
2808
2809 /* used for ROM loading : can write in RAM and ROM */
2810 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2811                                    const uint8_t *buf, int len)
2812 {
2813     cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2814 }
2815
2816 void cpu_flush_icache_range(hwaddr start, int len)
2817 {
2818     /*
2819      * This function should do the same thing as an icache flush that was
2820      * triggered from within the guest. For TCG we are always cache coherent,
2821      * so there is no need to flush anything. For KVM / Xen we need to flush
2822      * the host's instruction cache at least.
2823      */
2824     if (tcg_enabled()) {
2825         return;
2826     }
2827
2828     cpu_physical_memory_write_rom_internal(&address_space_memory,
2829                                            start, NULL, len, FLUSH_CACHE);
2830 }
2831
2832 typedef struct {
2833     MemoryRegion *mr;
2834     void *buffer;
2835     hwaddr addr;
2836     hwaddr len;
2837     bool in_use;
2838 } BounceBuffer;
2839
2840 static BounceBuffer bounce;
2841
2842 typedef struct MapClient {
2843     QEMUBH *bh;
2844     QLIST_ENTRY(MapClient) link;
2845 } MapClient;
2846
2847 QemuMutex map_client_list_lock;
2848 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2849     = QLIST_HEAD_INITIALIZER(map_client_list);
2850
2851 static void cpu_unregister_map_client_do(MapClient *client)
2852 {
2853     QLIST_REMOVE(client, link);
2854     g_free(client);
2855 }
2856
2857 static void cpu_notify_map_clients_locked(void)
2858 {
2859     MapClient *client;
2860
2861     while (!QLIST_EMPTY(&map_client_list)) {
2862         client = QLIST_FIRST(&map_client_list);
2863         qemu_bh_schedule(client->bh);
2864         cpu_unregister_map_client_do(client);
2865     }
2866 }
2867
2868 void cpu_register_map_client(QEMUBH *bh)
2869 {
2870     MapClient *client = g_malloc(sizeof(*client));
2871
2872     qemu_mutex_lock(&map_client_list_lock);
2873     client->bh = bh;
2874     QLIST_INSERT_HEAD(&map_client_list, client, link);
2875     if (!atomic_read(&bounce.in_use)) {
2876         cpu_notify_map_clients_locked();
2877     }
2878     qemu_mutex_unlock(&map_client_list_lock);
2879 }
2880
2881 void cpu_exec_init_all(void)
2882 {
2883     qemu_mutex_init(&ram_list.mutex);
2884     /* The data structures we set up here depend on knowing the page size,
2885      * so no more changes can be made after this point.
2886      * In an ideal world, nothing we did before we had finished the
2887      * machine setup would care about the target page size, and we could
2888      * do this much later, rather than requiring board models to state
2889      * up front what their requirements are.
2890      */
2891     finalize_target_page_bits();
2892     io_mem_init();
2893     memory_map_init();
2894     qemu_mutex_init(&map_client_list_lock);
2895 }
2896
2897 void cpu_unregister_map_client(QEMUBH *bh)
2898 {
2899     MapClient *client;
2900
2901     qemu_mutex_lock(&map_client_list_lock);
2902     QLIST_FOREACH(client, &map_client_list, link) {
2903         if (client->bh == bh) {
2904             cpu_unregister_map_client_do(client);
2905             break;
2906         }
2907     }
2908     qemu_mutex_unlock(&map_client_list_lock);
2909 }
2910
2911 static void cpu_notify_map_clients(void)
2912 {
2913     qemu_mutex_lock(&map_client_list_lock);
2914     cpu_notify_map_clients_locked();
2915     qemu_mutex_unlock(&map_client_list_lock);
2916 }
2917
2918 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2919 {
2920     MemoryRegion *mr;
2921     hwaddr l, xlat;
2922
2923     rcu_read_lock();
2924     while (len > 0) {
2925         l = len;
2926         mr = address_space_translate(as, addr, &xlat, &l, is_write);
2927         if (!memory_access_is_direct(mr, is_write)) {
2928             l = memory_access_size(mr, l, addr);
2929             if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2930                 return false;
2931             }
2932         }
2933
2934         len -= l;
2935         addr += l;
2936     }
2937     rcu_read_unlock();
2938     return true;
2939 }
2940
2941 /* Map a physical memory region into a host virtual address.
2942  * May map a subset of the requested range, given by and returned in *plen.
2943  * May return NULL if resources needed to perform the mapping are exhausted.
2944  * Use only for reads OR writes - not for read-modify-write operations.
2945  * Use cpu_register_map_client() to know when retrying the map operation is
2946  * likely to succeed.
2947  */
2948 void *address_space_map(AddressSpace *as,
2949                         hwaddr addr,
2950                         hwaddr *plen,
2951                         bool is_write)
2952 {
2953     hwaddr len = *plen;
2954     hwaddr done = 0;
2955     hwaddr l, xlat, base;
2956     MemoryRegion *mr, *this_mr;
2957     void *ptr;
2958
2959     if (len == 0) {
2960         return NULL;
2961     }
2962
2963     l = len;
2964     rcu_read_lock();
2965     mr = address_space_translate(as, addr, &xlat, &l, is_write);
2966
2967     if (!memory_access_is_direct(mr, is_write)) {
2968         if (atomic_xchg(&bounce.in_use, true)) {
2969             rcu_read_unlock();
2970             return NULL;
2971         }
2972         /* Avoid unbounded allocations */
2973         l = MIN(l, TARGET_PAGE_SIZE);
2974         bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2975         bounce.addr = addr;
2976         bounce.len = l;
2977
2978         memory_region_ref(mr);
2979         bounce.mr = mr;
2980         if (!is_write) {
2981             address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2982                                bounce.buffer, l);
2983         }
2984
2985         rcu_read_unlock();
2986         *plen = l;
2987         return bounce.buffer;
2988     }
2989
2990     base = xlat;
2991
2992     for (;;) {
2993         len -= l;
2994         addr += l;
2995         done += l;
2996         if (len == 0) {
2997             break;
2998         }
2999
3000         l = len;
3001         this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
3002         if (this_mr != mr || xlat != base + done) {
3003             break;
3004         }
3005     }
3006
3007     memory_region_ref(mr);
3008     *plen = done;
3009     ptr = qemu_ram_ptr_length(mr->ram_block, base, plen);
3010     rcu_read_unlock();
3011
3012     return ptr;
3013 }
3014
3015 /* Unmaps a memory region previously mapped by address_space_map().
3016  * Will also mark the memory as dirty if is_write == 1.  access_len gives
3017  * the amount of memory that was actually read or written by the caller.
3018  */
3019 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3020                          int is_write, hwaddr access_len)
3021 {
3022     if (buffer != bounce.buffer) {
3023         MemoryRegion *mr;
3024         ram_addr_t addr1;
3025
3026         mr = memory_region_from_host(buffer, &addr1);
3027         assert(mr != NULL);
3028         if (is_write) {
3029             invalidate_and_set_dirty(mr, addr1, access_len);
3030         }
3031         if (xen_enabled()) {
3032             xen_invalidate_map_cache_entry(buffer);
3033         }
3034         memory_region_unref(mr);
3035         return;
3036     }
3037     if (is_write) {
3038         address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
3039                             bounce.buffer, access_len);
3040     }
3041     qemu_vfree(bounce.buffer);
3042     bounce.buffer = NULL;
3043     memory_region_unref(bounce.mr);
3044     atomic_mb_set(&bounce.in_use, false);
3045     cpu_notify_map_clients();
3046 }
3047
3048 void *cpu_physical_memory_map(hwaddr addr,
3049                               hwaddr *plen,
3050                               int is_write)
3051 {
3052     return address_space_map(&address_space_memory, addr, plen, is_write);
3053 }
3054
3055 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3056                                int is_write, hwaddr access_len)
3057 {
3058     return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3059 }
3060
3061 /* warning: addr must be aligned */
3062 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3063                                                   MemTxAttrs attrs,
3064                                                   MemTxResult *result,
3065                                                   enum device_endian endian)
3066 {
3067     uint8_t *ptr;
3068     uint64_t val;
3069     MemoryRegion *mr;
3070     hwaddr l = 4;
3071     hwaddr addr1;
3072     MemTxResult r;
3073     bool release_lock = false;
3074
3075     rcu_read_lock();
3076     mr = address_space_translate(as, addr, &addr1, &l, false);
3077     if (l < 4 || !memory_access_is_direct(mr, false)) {
3078         release_lock |= prepare_mmio_access(mr);
3079
3080         /* I/O case */
3081         r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3082 #if defined(TARGET_WORDS_BIGENDIAN)
3083         if (endian == DEVICE_LITTLE_ENDIAN) {
3084             val = bswap32(val);
3085         }
3086 #else
3087         if (endian == DEVICE_BIG_ENDIAN) {
3088             val = bswap32(val);
3089         }
3090 #endif
3091     } else {
3092         /* RAM case */
3093         ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3094         switch (endian) {
3095         case DEVICE_LITTLE_ENDIAN:
3096             val = ldl_le_p(ptr);
3097             break;
3098         case DEVICE_BIG_ENDIAN:
3099             val = ldl_be_p(ptr);
3100             break;
3101         default:
3102             val = ldl_p(ptr);
3103             break;
3104         }
3105         r = MEMTX_OK;
3106     }
3107     if (result) {
3108         *result = r;
3109     }
3110     if (release_lock) {
3111         qemu_mutex_unlock_iothread();
3112     }
3113     rcu_read_unlock();
3114     return val;
3115 }
3116
3117 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3118                            MemTxAttrs attrs, MemTxResult *result)
3119 {
3120     return address_space_ldl_internal(as, addr, attrs, result,
3121                                       DEVICE_NATIVE_ENDIAN);
3122 }
3123
3124 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3125                               MemTxAttrs attrs, MemTxResult *result)
3126 {
3127     return address_space_ldl_internal(as, addr, attrs, result,
3128                                       DEVICE_LITTLE_ENDIAN);
3129 }
3130
3131 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3132                               MemTxAttrs attrs, MemTxResult *result)
3133 {
3134     return address_space_ldl_internal(as, addr, attrs, result,
3135                                       DEVICE_BIG_ENDIAN);
3136 }
3137
3138 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3139 {
3140     return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3141 }
3142
3143 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3144 {
3145     return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3146 }
3147
3148 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3149 {
3150     return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3151 }
3152
3153 /* warning: addr must be aligned */
3154 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3155                                                   MemTxAttrs attrs,
3156                                                   MemTxResult *result,
3157                                                   enum device_endian endian)
3158 {
3159     uint8_t *ptr;
3160     uint64_t val;
3161     MemoryRegion *mr;
3162     hwaddr l = 8;
3163     hwaddr addr1;
3164     MemTxResult r;
3165     bool release_lock = false;
3166
3167     rcu_read_lock();
3168     mr = address_space_translate(as, addr, &addr1, &l,
3169                                  false);
3170     if (l < 8 || !memory_access_is_direct(mr, false)) {
3171         release_lock |= prepare_mmio_access(mr);
3172
3173         /* I/O case */
3174         r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3175 #if defined(TARGET_WORDS_BIGENDIAN)
3176         if (endian == DEVICE_LITTLE_ENDIAN) {
3177             val = bswap64(val);
3178         }
3179 #else
3180         if (endian == DEVICE_BIG_ENDIAN) {
3181             val = bswap64(val);
3182         }
3183 #endif
3184     } else {
3185         /* RAM case */
3186         ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3187         switch (endian) {
3188         case DEVICE_LITTLE_ENDIAN:
3189             val = ldq_le_p(ptr);
3190             break;
3191         case DEVICE_BIG_ENDIAN:
3192             val = ldq_be_p(ptr);
3193             break;
3194         default:
3195             val = ldq_p(ptr);
3196             break;
3197         }
3198         r = MEMTX_OK;
3199     }
3200     if (result) {
3201         *result = r;
3202     }
3203     if (release_lock) {
3204         qemu_mutex_unlock_iothread();
3205     }
3206     rcu_read_unlock();
3207     return val;
3208 }
3209
3210 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3211                            MemTxAttrs attrs, MemTxResult *result)
3212 {
3213     return address_space_ldq_internal(as, addr, attrs, result,
3214                                       DEVICE_NATIVE_ENDIAN);
3215 }
3216
3217 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3218                            MemTxAttrs attrs, MemTxResult *result)
3219 {
3220     return address_space_ldq_internal(as, addr, attrs, result,
3221                                       DEVICE_LITTLE_ENDIAN);
3222 }
3223
3224 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3225                            MemTxAttrs attrs, MemTxResult *result)
3226 {
3227     return address_space_ldq_internal(as, addr, attrs, result,
3228                                       DEVICE_BIG_ENDIAN);
3229 }
3230
3231 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3232 {
3233     return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3234 }
3235
3236 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3237 {
3238     return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3239 }
3240
3241 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3242 {
3243     return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3244 }
3245
3246 /* XXX: optimize */
3247 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3248                             MemTxAttrs attrs, MemTxResult *result)
3249 {
3250     uint8_t val;
3251     MemTxResult r;
3252
3253     r = address_space_rw(as, addr, attrs, &val, 1, 0);
3254     if (result) {
3255         *result = r;
3256     }
3257     return val;
3258 }
3259
3260 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3261 {
3262     return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3263 }
3264
3265 /* warning: addr must be aligned */
3266 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3267                                                    hwaddr addr,
3268                                                    MemTxAttrs attrs,
3269                                                    MemTxResult *result,
3270                                                    enum device_endian endian)
3271 {
3272     uint8_t *ptr;
3273     uint64_t val;
3274     MemoryRegion *mr;
3275     hwaddr l = 2;
3276     hwaddr addr1;
3277     MemTxResult r;
3278     bool release_lock = false;
3279
3280     rcu_read_lock();
3281     mr = address_space_translate(as, addr, &addr1, &l,
3282                                  false);
3283     if (l < 2 || !memory_access_is_direct(mr, false)) {
3284         release_lock |= prepare_mmio_access(mr);
3285
3286         /* I/O case */
3287         r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3288 #if defined(TARGET_WORDS_BIGENDIAN)
3289         if (endian == DEVICE_LITTLE_ENDIAN) {
3290             val = bswap16(val);
3291         }
3292 #else
3293         if (endian == DEVICE_BIG_ENDIAN) {
3294             val = bswap16(val);
3295         }
3296 #endif
3297     } else {
3298         /* RAM case */
3299         ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3300         switch (endian) {
3301         case DEVICE_LITTLE_ENDIAN:
3302             val = lduw_le_p(ptr);
3303             break;
3304         case DEVICE_BIG_ENDIAN:
3305             val = lduw_be_p(ptr);
3306             break;
3307         default:
3308             val = lduw_p(ptr);
3309             break;
3310         }
3311         r = MEMTX_OK;
3312     }
3313     if (result) {
3314         *result = r;
3315     }
3316     if (release_lock) {
3317         qemu_mutex_unlock_iothread();
3318     }
3319     rcu_read_unlock();
3320     return val;
3321 }
3322
3323 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3324                            MemTxAttrs attrs, MemTxResult *result)
3325 {
3326     return address_space_lduw_internal(as, addr, attrs, result,
3327                                        DEVICE_NATIVE_ENDIAN);
3328 }
3329
3330 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3331                            MemTxAttrs attrs, MemTxResult *result)
3332 {
3333     return address_space_lduw_internal(as, addr, attrs, result,
3334                                        DEVICE_LITTLE_ENDIAN);
3335 }
3336
3337 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3338                            MemTxAttrs attrs, MemTxResult *result)
3339 {
3340     return address_space_lduw_internal(as, addr, attrs, result,
3341                                        DEVICE_BIG_ENDIAN);
3342 }
3343
3344 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3345 {
3346     return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3347 }
3348
3349 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3350 {
3351     return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3352 }
3353
3354 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3355 {
3356     return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3357 }
3358
3359 /* warning: addr must be aligned. The ram page is not masked as dirty
3360    and the code inside is not invalidated. It is useful if the dirty
3361    bits are used to track modified PTEs */
3362 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3363                                 MemTxAttrs attrs, MemTxResult *result)
3364 {
3365     uint8_t *ptr;
3366     MemoryRegion *mr;
3367     hwaddr l = 4;
3368     hwaddr addr1;
3369     MemTxResult r;
3370     uint8_t dirty_log_mask;
3371     bool release_lock = false;
3372
3373     rcu_read_lock();
3374     mr = address_space_translate(as, addr, &addr1, &l,
3375                                  true);
3376     if (l < 4 || !memory_access_is_direct(mr, true)) {
3377         release_lock |= prepare_mmio_access(mr);
3378
3379         r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3380     } else {
3381         ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3382         stl_p(ptr, val);
3383
3384         dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3385         dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3386         cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr,
3387                                             4, dirty_log_mask);
3388         r = MEMTX_OK;
3389     }
3390     if (result) {
3391         *result = r;
3392     }
3393     if (release_lock) {
3394         qemu_mutex_unlock_iothread();
3395     }
3396     rcu_read_unlock();
3397 }
3398
3399 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3400 {
3401     address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3402 }
3403
3404 /* warning: addr must be aligned */
3405 static inline void address_space_stl_internal(AddressSpace *as,
3406                                               hwaddr addr, uint32_t val,
3407                                               MemTxAttrs attrs,
3408                                               MemTxResult *result,
3409                                               enum device_endian endian)
3410 {
3411     uint8_t *ptr;
3412     MemoryRegion *mr;
3413     hwaddr l = 4;
3414     hwaddr addr1;
3415     MemTxResult r;
3416     bool release_lock = false;
3417
3418     rcu_read_lock();
3419     mr = address_space_translate(as, addr, &addr1, &l,
3420                                  true);
3421     if (l < 4 || !memory_access_is_direct(mr, true)) {
3422         release_lock |= prepare_mmio_access(mr);
3423
3424 #if defined(TARGET_WORDS_BIGENDIAN)
3425         if (endian == DEVICE_LITTLE_ENDIAN) {
3426             val = bswap32(val);
3427         }
3428 #else
3429         if (endian == DEVICE_BIG_ENDIAN) {
3430             val = bswap32(val);
3431         }
3432 #endif
3433         r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3434     } else {
3435         /* RAM case */
3436         ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3437         switch (endian) {
3438         case DEVICE_LITTLE_ENDIAN:
3439             stl_le_p(ptr, val);
3440             break;
3441         case DEVICE_BIG_ENDIAN:
3442             stl_be_p(ptr, val);
3443             break;
3444         default:
3445             stl_p(ptr, val);
3446             break;
3447         }
3448         invalidate_and_set_dirty(mr, addr1, 4);
3449         r = MEMTX_OK;
3450     }
3451     if (result) {
3452         *result = r;
3453     }
3454     if (release_lock) {
3455         qemu_mutex_unlock_iothread();
3456     }
3457     rcu_read_unlock();
3458 }
3459
3460 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3461                        MemTxAttrs attrs, MemTxResult *result)
3462 {
3463     address_space_stl_internal(as, addr, val, attrs, result,
3464                                DEVICE_NATIVE_ENDIAN);
3465 }
3466
3467 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3468                        MemTxAttrs attrs, MemTxResult *result)
3469 {
3470     address_space_stl_internal(as, addr, val, attrs, result,
3471                                DEVICE_LITTLE_ENDIAN);
3472 }
3473
3474 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3475                        MemTxAttrs attrs, MemTxResult *result)
3476 {
3477     address_space_stl_internal(as, addr, val, attrs, result,
3478                                DEVICE_BIG_ENDIAN);
3479 }
3480
3481 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3482 {
3483     address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3484 }
3485
3486 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3487 {
3488     address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3489 }
3490
3491 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3492 {
3493     address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3494 }
3495
3496 /* XXX: optimize */
3497 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3498                        MemTxAttrs attrs, MemTxResult *result)
3499 {
3500     uint8_t v = val;
3501     MemTxResult r;
3502
3503     r = address_space_rw(as, addr, attrs, &v, 1, 1);
3504     if (result) {
3505         *result = r;
3506     }
3507 }
3508
3509 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3510 {
3511     address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3512 }
3513
3514 /* warning: addr must be aligned */
3515 static inline void address_space_stw_internal(AddressSpace *as,
3516                                               hwaddr addr, uint32_t val,
3517                                               MemTxAttrs attrs,
3518                                               MemTxResult *result,
3519                                               enum device_endian endian)
3520 {
3521     uint8_t *ptr;
3522     MemoryRegion *mr;
3523     hwaddr l = 2;
3524     hwaddr addr1;
3525     MemTxResult r;
3526     bool release_lock = false;
3527
3528     rcu_read_lock();
3529     mr = address_space_translate(as, addr, &addr1, &l, true);
3530     if (l < 2 || !memory_access_is_direct(mr, true)) {
3531         release_lock |= prepare_mmio_access(mr);
3532
3533 #if defined(TARGET_WORDS_BIGENDIAN)
3534         if (endian == DEVICE_LITTLE_ENDIAN) {
3535             val = bswap16(val);
3536         }
3537 #else
3538         if (endian == DEVICE_BIG_ENDIAN) {
3539             val = bswap16(val);
3540         }
3541 #endif
3542         r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3543     } else {
3544         /* RAM case */
3545         ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3546         switch (endian) {
3547         case DEVICE_LITTLE_ENDIAN:
3548             stw_le_p(ptr, val);
3549             break;
3550         case DEVICE_BIG_ENDIAN:
3551             stw_be_p(ptr, val);
3552             break;
3553         default:
3554             stw_p(ptr, val);
3555             break;
3556         }
3557         invalidate_and_set_dirty(mr, addr1, 2);
3558         r = MEMTX_OK;
3559     }
3560     if (result) {
3561         *result = r;
3562     }
3563     if (release_lock) {
3564         qemu_mutex_unlock_iothread();
3565     }
3566     rcu_read_unlock();
3567 }
3568
3569 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3570                        MemTxAttrs attrs, MemTxResult *result)
3571 {
3572     address_space_stw_internal(as, addr, val, attrs, result,
3573                                DEVICE_NATIVE_ENDIAN);
3574 }
3575
3576 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3577                        MemTxAttrs attrs, MemTxResult *result)
3578 {
3579     address_space_stw_internal(as, addr, val, attrs, result,
3580                                DEVICE_LITTLE_ENDIAN);
3581 }
3582
3583 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3584                        MemTxAttrs attrs, MemTxResult *result)
3585 {
3586     address_space_stw_internal(as, addr, val, attrs, result,
3587                                DEVICE_BIG_ENDIAN);
3588 }
3589
3590 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3591 {
3592     address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3593 }
3594
3595 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3596 {
3597     address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3598 }
3599
3600 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3601 {
3602     address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3603 }
3604
3605 /* XXX: optimize */
3606 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3607                        MemTxAttrs attrs, MemTxResult *result)
3608 {
3609     MemTxResult r;
3610     val = tswap64(val);
3611     r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3612     if (result) {
3613         *result = r;
3614     }
3615 }
3616
3617 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3618                        MemTxAttrs attrs, MemTxResult *result)
3619 {
3620     MemTxResult r;
3621     val = cpu_to_le64(val);
3622     r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3623     if (result) {
3624         *result = r;
3625     }
3626 }
3627 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3628                        MemTxAttrs attrs, MemTxResult *result)
3629 {
3630     MemTxResult r;
3631     val = cpu_to_be64(val);
3632     r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3633     if (result) {
3634         *result = r;
3635     }
3636 }
3637
3638 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3639 {
3640     address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3641 }
3642
3643 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3644 {
3645     address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3646 }
3647
3648 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3649 {
3650     address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3651 }
3652
3653 /* virtual memory access for debug (includes writing to ROM) */
3654 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3655                         uint8_t *buf, int len, int is_write)
3656 {
3657     int l;
3658     hwaddr phys_addr;
3659     target_ulong page;
3660
3661     while (len > 0) {
3662         int asidx;
3663         MemTxAttrs attrs;
3664
3665         page = addr & TARGET_PAGE_MASK;
3666         phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3667         asidx = cpu_asidx_from_attrs(cpu, attrs);
3668         /* if no physical page mapped, return an error */
3669         if (phys_addr == -1)
3670             return -1;
3671         l = (page + TARGET_PAGE_SIZE) - addr;
3672         if (l > len)
3673             l = len;
3674         phys_addr += (addr & ~TARGET_PAGE_MASK);
3675         if (is_write) {
3676             cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3677                                           phys_addr, buf, l);
3678         } else {
3679             address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3680                              MEMTXATTRS_UNSPECIFIED,
3681                              buf, l, 0);
3682         }
3683         len -= l;
3684         buf += l;
3685         addr += l;
3686     }
3687     return 0;
3688 }
3689
3690 /*
3691  * Allows code that needs to deal with migration bitmaps etc to still be built
3692  * target independent.
3693  */
3694 size_t qemu_target_page_bits(void)
3695 {
3696     return TARGET_PAGE_BITS;
3697 }
3698
3699 #endif
3700
3701 /*
3702  * A helper function for the _utterly broken_ virtio device model to find out if
3703  * it's running on a big endian machine. Don't do this at home kids!
3704  */
3705 bool target_words_bigendian(void);
3706 bool target_words_bigendian(void)
3707 {
3708 #if defined(TARGET_WORDS_BIGENDIAN)
3709     return true;
3710 #else
3711     return false;
3712 #endif
3713 }
3714
3715 #ifndef CONFIG_USER_ONLY
3716 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3717 {
3718     MemoryRegion*mr;
3719     hwaddr l = 1;
3720     bool res;
3721
3722     rcu_read_lock();
3723     mr = address_space_translate(&address_space_memory,
3724                                  phys_addr, &phys_addr, &l, false);
3725
3726     res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3727     rcu_read_unlock();
3728     return res;
3729 }
3730
3731 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3732 {
3733     RAMBlock *block;
3734     int ret = 0;
3735
3736     rcu_read_lock();
3737     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3738         ret = func(block->idstr, block->host, block->offset,
3739                    block->used_length, opaque);
3740         if (ret) {
3741             break;
3742         }
3743     }
3744     rcu_read_unlock();
3745     return ret;
3746 }
3747 #endif