exec.c

   1 /*
   2  *  Virtual page mapping
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include "config.h"
  20 #ifndef _WIN32
  21 #include <sys/types.h>
  22 #include <sys/mman.h>
  23 #endif
  24
  25 #include "qemu-common.h"
  26 #include "cpu.h"
  27 #include "tcg.h"
  28 #include "hw/hw.h"
  29 #if !defined(CONFIG_USER_ONLY)
  30 #include "hw/boards.h"
  31 #endif
  32 #include "hw/qdev.h"
  33 #include "qemu/osdep.h"
  34 #include "sysemu/kvm.h"
  35 #include "sysemu/sysemu.h"
  36 #include "hw/xen/xen.h"
  37 #include "qemu/timer.h"
  38 #include "qemu/config-file.h"
  39 #include "qemu/error-report.h"
  40 #include "exec/memory.h"
  41 #include "sysemu/dma.h"
  42 #include "exec/address-spaces.h"
  43 #if defined(CONFIG_USER_ONLY)
  44 #include <qemu.h>
  45 #else /* !CONFIG_USER_ONLY */
  46 #include "sysemu/xen-mapcache.h"
  47 #include "trace.h"
  48 #endif
  49 #include "exec/cpu-all.h"
  50 #include "qemu/rcu_queue.h"
  51 #include "qemu/main-loop.h"
  52 #include "translate-all.h"
  53
  54 #include "exec/memory-internal.h"
  55 #include "exec/ram_addr.h"
  56
  57 #include "qemu/range.h"
  58 #ifndef _WIN32
  59 #include "qemu/mmap-alloc.h"
  60 #endif
  61
  62 //#define DEBUG_SUBPAGE
  63
  64 #if !defined(CONFIG_USER_ONLY)
  65 /* ram_list is read under rcu_read_lock()/rcu_read_unlock().  Writes
  66  * are protected by the ramlist lock.
  67  */
  68 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
  69
  70 static MemoryRegion *system_memory;
  71 static MemoryRegion *system_io;
  72
  73 AddressSpace address_space_io;
  74 AddressSpace address_space_memory;
  75
  76 MemoryRegion io_mem_rom, io_mem_notdirty;
  77 static MemoryRegion io_mem_unassigned;
  78
  79 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
  80 #define RAM_PREALLOC   (1 << 0)
  81
  82 /* RAM is mmap-ed with MAP_SHARED */
  83 #define RAM_SHARED     (1 << 1)
  84
  85 /* Only a portion of RAM (used_length) is actually used, and migrated.
  86  * This used_length size can change across reboots.
  87  */
  88 #define RAM_RESIZEABLE (1 << 2)
  89
  90 /* RAM is backed by an mmapped file.
  91  */
  92 #define RAM_FILE (1 << 3)
  93 #endif
  94
  95 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
  96 /* current CPU in the current thread. It is only valid inside
  97    cpu_exec() */
  98 __thread CPUState *current_cpu;
  99 /* 0 = Do not count executed instructions.
 100    1 = Precise instruction counting.
 101    2 = Adaptive rate instruction counting.  */
 102 int use_icount;
 103
 104 #if !defined(CONFIG_USER_ONLY)
 105
 106 typedef struct PhysPageEntry PhysPageEntry;
 107
 108 struct PhysPageEntry {
 109     /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
 110     uint32_t skip : 6;
 111      /* index into phys_sections (!skip) or phys_map_nodes (skip) */
 112     uint32_t ptr : 26;
 113 };
 114
 115 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
 116
 117 /* Size of the L2 (and L3, etc) page tables.  */
 118 #define ADDR_SPACE_BITS 64
 119
 120 #define P_L2_BITS 9
 121 #define P_L2_SIZE (1 << P_L2_BITS)
 122
 123 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
 124
 125 typedef PhysPageEntry Node[P_L2_SIZE];
 126
 127 typedef struct PhysPageMap {
 128     struct rcu_head rcu;
 129
 130     unsigned sections_nb;
 131     unsigned sections_nb_alloc;
 132     unsigned nodes_nb;
 133     unsigned nodes_nb_alloc;
 134     Node *nodes;
 135     MemoryRegionSection *sections;
 136 } PhysPageMap;
 137
 138 struct AddressSpaceDispatch {
 139     struct rcu_head rcu;
 140
 141     /* This is a multi-level map on the physical address space.
 142      * The bottom level has pointers to MemoryRegionSections.
 143      */
 144     PhysPageEntry phys_map;
 145     PhysPageMap map;
 146     AddressSpace *as;
 147 };
 148
 149 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
 150 typedef struct subpage_t {
 151     MemoryRegion iomem;
 152     AddressSpace *as;
 153     hwaddr base;
 154     uint16_t sub_section[TARGET_PAGE_SIZE];
 155 } subpage_t;
 156
 157 #define PHYS_SECTION_UNASSIGNED 0
 158 #define PHYS_SECTION_NOTDIRTY 1
 159 #define PHYS_SECTION_ROM 2
 160 #define PHYS_SECTION_WATCH 3
 161
 162 static void io_mem_init(void);
 163 static void memory_map_init(void);
 164 static void tcg_commit(MemoryListener *listener);
 165
 166 static MemoryRegion io_mem_watch;
 167
 168 /**
 169  * CPUAddressSpace: all the information a CPU needs about an AddressSpace
 170  * @cpu: the CPU whose AddressSpace this is
 171  * @as: the AddressSpace itself
 172  * @memory_dispatch: its dispatch pointer (cached, RCU protected)
 173  * @tcg_as_listener: listener for tracking changes to the AddressSpace
 174  */
 175 struct CPUAddressSpace {
 176     CPUState *cpu;
 177     AddressSpace *as;
 178     struct AddressSpaceDispatch *memory_dispatch;
 179     MemoryListener tcg_as_listener;
 180 };
 181
 182 #endif
 183
 184 #if !defined(CONFIG_USER_ONLY)
 185
 186 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
 187 {
 188     if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
 189         map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
 190         map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
 191         map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
 192     }
 193 }
 194
 195 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
 196 {
 197     unsigned i;
 198     uint32_t ret;
 199     PhysPageEntry e;
 200     PhysPageEntry *p;
 201
 202     ret = map->nodes_nb++;
 203     p = map->nodes[ret];
 204     assert(ret != PHYS_MAP_NODE_NIL);
 205     assert(ret != map->nodes_nb_alloc);
 206
 207     e.skip = leaf ? 0 : 1;
 208     e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
 209     for (i = 0; i < P_L2_SIZE; ++i) {
 210         memcpy(&p[i], &e, sizeof(e));
 211     }
 212     return ret;
 213 }
 214
 215 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
 216                                 hwaddr *index, hwaddr *nb, uint16_t leaf,
 217                                 int level)
 218 {
 219     PhysPageEntry *p;
 220     hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
 221
 222     if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
 223         lp->ptr = phys_map_node_alloc(map, level == 0);
 224     }
 225     p = map->nodes[lp->ptr];
 226     lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
 227
 228     while (*nb && lp < &p[P_L2_SIZE]) {
 229         if ((*index & (step - 1)) == 0 && *nb >= step) {
 230             lp->skip = 0;
 231             lp->ptr = leaf;
 232             *index += step;
 233             *nb -= step;
 234         } else {
 235             phys_page_set_level(map, lp, index, nb, leaf, level - 1);
 236         }
 237         ++lp;
 238     }
 239 }
 240
 241 static void phys_page_set(AddressSpaceDispatch *d,
 242                           hwaddr index, hwaddr nb,
 243                           uint16_t leaf)
 244 {
 245     /* Wildly overreserve - it doesn't matter much. */
 246     phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
 247
 248     phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
 249 }
 250
 251 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
 252  * and update our entry so we can skip it and go directly to the destination.
 253  */
 254 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
 255 {
 256     unsigned valid_ptr = P_L2_SIZE;
 257     int valid = 0;
 258     PhysPageEntry *p;
 259     int i;
 260
 261     if (lp->ptr == PHYS_MAP_NODE_NIL) {
 262         return;
 263     }
 264
 265     p = nodes[lp->ptr];
 266     for (i = 0; i < P_L2_SIZE; i++) {
 267         if (p[i].ptr == PHYS_MAP_NODE_NIL) {
 268             continue;
 269         }
 270
 271         valid_ptr = i;
 272         valid++;
 273         if (p[i].skip) {
 274             phys_page_compact(&p[i], nodes, compacted);
 275         }
 276     }
 277
 278     /* We can only compress if there's only one child. */
 279     if (valid != 1) {
 280         return;
 281     }
 282
 283     assert(valid_ptr < P_L2_SIZE);
 284
 285     /* Don't compress if it won't fit in the # of bits we have. */
 286     if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
 287         return;
 288     }
 289
 290     lp->ptr = p[valid_ptr].ptr;
 291     if (!p[valid_ptr].skip) {
 292         /* If our only child is a leaf, make this a leaf. */
 293         /* By design, we should have made this node a leaf to begin with so we
 294          * should never reach here.
 295          * But since it's so simple to handle this, let's do it just in case we
 296          * change this rule.
 297          */
 298         lp->skip = 0;
 299     } else {
 300         lp->skip += p[valid_ptr].skip;
 301     }
 302 }
 303
 304 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
 305 {
 306     DECLARE_BITMAP(compacted, nodes_nb);
 307
 308     if (d->phys_map.skip) {
 309         phys_page_compact(&d->phys_map, d->map.nodes, compacted);
 310     }
 311 }
 312
 313 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
 314                                            Node *nodes, MemoryRegionSection *sections)
 315 {
 316     PhysPageEntry *p;
 317     hwaddr index = addr >> TARGET_PAGE_BITS;
 318     int i;
 319
 320     for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
 321         if (lp.ptr == PHYS_MAP_NODE_NIL) {
 322             return &sections[PHYS_SECTION_UNASSIGNED];
 323         }
 324         p = nodes[lp.ptr];
 325         lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
 326     }
 327
 328     if (sections[lp.ptr].size.hi ||
 329         range_covers_byte(sections[lp.ptr].offset_within_address_space,
 330                           sections[lp.ptr].size.lo, addr)) {
 331         return &sections[lp.ptr];
 332     } else {
 333         return &sections[PHYS_SECTION_UNASSIGNED];
 334     }
 335 }
 336
 337 bool memory_region_is_unassigned(MemoryRegion *mr)
 338 {
 339     return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
 340         && mr != &io_mem_watch;
 341 }
 342
 343 /* Called from RCU critical section */
 344 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
 345                                                         hwaddr addr,
 346                                                         bool resolve_subpage)
 347 {
 348     MemoryRegionSection *section;
 349     subpage_t *subpage;
 350
 351     section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
 352     if (resolve_subpage && section->mr->subpage) {
 353         subpage = container_of(section->mr, subpage_t, iomem);
 354         section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
 355     }
 356     return section;
 357 }
 358
 359 /* Called from RCU critical section */
 360 static MemoryRegionSection *
 361 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
 362                                  hwaddr *plen, bool resolve_subpage)
 363 {
 364     MemoryRegionSection *section;
 365     MemoryRegion *mr;
 366     Int128 diff;
 367
 368     section = address_space_lookup_region(d, addr, resolve_subpage);
 369     /* Compute offset within MemoryRegionSection */
 370     addr -= section->offset_within_address_space;
 371
 372     /* Compute offset within MemoryRegion */
 373     *xlat = addr + section->offset_within_region;
 374
 375     mr = section->mr;
 376
 377     /* MMIO registers can be expected to perform full-width accesses based only
 378      * on their address, without considering adjacent registers that could
 379      * decode to completely different MemoryRegions.  When such registers
 380      * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
 381      * regions overlap wildly.  For this reason we cannot clamp the accesses
 382      * here.
 383      *
 384      * If the length is small (as is the case for address_space_ldl/stl),
 385      * everything works fine.  If the incoming length is large, however,
 386      * the caller really has to do the clamping through memory_access_size.
 387      */
 388     if (memory_region_is_ram(mr)) {
 389         diff = int128_sub(section->size, int128_make64(addr));
 390         *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
 391     }
 392     return section;
 393 }
 394
 395 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
 396 {
 397     if (memory_region_is_ram(mr)) {
 398         return !(is_write && mr->readonly);
 399     }
 400     if (memory_region_is_romd(mr)) {
 401         return !is_write;
 402     }
 403
 404     return false;
 405 }
 406
 407 /* Called from RCU critical section */
 408 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
 409                                       hwaddr *xlat, hwaddr *plen,
 410                                       bool is_write)
 411 {
 412     IOMMUTLBEntry iotlb;
 413     MemoryRegionSection *section;
 414     MemoryRegion *mr;
 415
 416     for (;;) {
 417         AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
 418         section = address_space_translate_internal(d, addr, &addr, plen, true);
 419         mr = section->mr;
 420
 421         if (!mr->iommu_ops) {
 422             break;
 423         }
 424
 425         iotlb = mr->iommu_ops->translate(mr, addr, is_write);
 426         addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
 427                 | (addr & iotlb.addr_mask));
 428         *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
 429         if (!(iotlb.perm & (1 << is_write))) {
 430             mr = &io_mem_unassigned;
 431             break;
 432         }
 433
 434         as = iotlb.target_as;
 435     }
 436
 437     if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
 438         hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
 439         *plen = MIN(page, *plen);
 440     }
 441
 442     *xlat = addr;
 443     return mr;
 444 }
 445
 446 /* Called from RCU critical section */
 447 MemoryRegionSection *
 448 address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
 449                                   hwaddr *xlat, hwaddr *plen)
 450 {
 451     MemoryRegionSection *section;
 452     section = address_space_translate_internal(cpu->cpu_ases[0].memory_dispatch,
 453                                                addr, xlat, plen, false);
 454
 455     assert(!section->mr->iommu_ops);
 456     return section;
 457 }
 458 #endif
 459
 460 #if !defined(CONFIG_USER_ONLY)
 461
 462 static int cpu_common_post_load(void *opaque, int version_id)
 463 {
 464     CPUState *cpu = opaque;
 465
 466     /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
 467        version_id is increased. */
 468     cpu->interrupt_request &= ~0x01;
 469     tlb_flush(cpu, 1);
 470
 471     return 0;
 472 }
 473
 474 static int cpu_common_pre_load(void *opaque)
 475 {
 476     CPUState *cpu = opaque;
 477
 478     cpu->exception_index = -1;
 479
 480     return 0;
 481 }
 482
 483 static bool cpu_common_exception_index_needed(void *opaque)
 484 {
 485     CPUState *cpu = opaque;
 486
 487     return tcg_enabled() && cpu->exception_index != -1;
 488 }
 489
 490 static const VMStateDescription vmstate_cpu_common_exception_index = {
 491     .name = "cpu_common/exception_index",
 492     .version_id = 1,
 493     .minimum_version_id = 1,
 494     .needed = cpu_common_exception_index_needed,
 495     .fields = (VMStateField[]) {
 496         VMSTATE_INT32(exception_index, CPUState),
 497         VMSTATE_END_OF_LIST()
 498     }
 499 };
 500
 501 static bool cpu_common_crash_occurred_needed(void *opaque)
 502 {
 503     CPUState *cpu = opaque;
 504
 505     return cpu->crash_occurred;
 506 }
 507
 508 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
 509     .name = "cpu_common/crash_occurred",
 510     .version_id = 1,
 511     .minimum_version_id = 1,
 512     .needed = cpu_common_crash_occurred_needed,
 513     .fields = (VMStateField[]) {
 514         VMSTATE_BOOL(crash_occurred, CPUState),
 515         VMSTATE_END_OF_LIST()
 516     }
 517 };
 518
 519 const VMStateDescription vmstate_cpu_common = {
 520     .name = "cpu_common",
 521     .version_id = 1,
 522     .minimum_version_id = 1,
 523     .pre_load = cpu_common_pre_load,
 524     .post_load = cpu_common_post_load,
 525     .fields = (VMStateField[]) {
 526         VMSTATE_UINT32(halted, CPUState),
 527         VMSTATE_UINT32(interrupt_request, CPUState),
 528         VMSTATE_END_OF_LIST()
 529     },
 530     .subsections = (const VMStateDescription*[]) {
 531         &vmstate_cpu_common_exception_index,
 532         &vmstate_cpu_common_crash_occurred,
 533         NULL
 534     }
 535 };
 536
 537 #endif
 538
 539 CPUState *qemu_get_cpu(int index)
 540 {
 541     CPUState *cpu;
 542
 543     CPU_FOREACH(cpu) {
 544         if (cpu->cpu_index == index) {
 545             return cpu;
 546         }
 547     }
 548
 549     return NULL;
 550 }
 551
 552 #if !defined(CONFIG_USER_ONLY)
 553 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
 554 {
 555     /* We only support one address space per cpu at the moment.  */
 556     assert(cpu->as == as);
 557
 558     if (cpu->cpu_ases) {
 559         /* We've already registered the listener for our only AS */
 560         return;
 561     }
 562
 563     cpu->cpu_ases = g_new0(CPUAddressSpace, 1);
 564     cpu->cpu_ases[0].cpu = cpu;
 565     cpu->cpu_ases[0].as = as;
 566     cpu->cpu_ases[0].tcg_as_listener.commit = tcg_commit;
 567     memory_listener_register(&cpu->cpu_ases[0].tcg_as_listener, as);
 568 }
 569 #endif
 570
 571 #ifndef CONFIG_USER_ONLY
 572 static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
 573
 574 static int cpu_get_free_index(Error **errp)
 575 {
 576     int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
 577
 578     if (cpu >= MAX_CPUMASK_BITS) {
 579         error_setg(errp, "Trying to use more CPUs than max of %d",
 580                    MAX_CPUMASK_BITS);
 581         return -1;
 582     }
 583
 584     bitmap_set(cpu_index_map, cpu, 1);
 585     return cpu;
 586 }
 587
 588 void cpu_exec_exit(CPUState *cpu)
 589 {
 590     if (cpu->cpu_index == -1) {
 591         /* cpu_index was never allocated by this @cpu or was already freed. */
 592         return;
 593     }
 594
 595     bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
 596     cpu->cpu_index = -1;
 597 }
 598 #else
 599
 600 static int cpu_get_free_index(Error **errp)
 601 {
 602     CPUState *some_cpu;
 603     int cpu_index = 0;
 604
 605     CPU_FOREACH(some_cpu) {
 606         cpu_index++;
 607     }
 608     return cpu_index;
 609 }
 610
 611 void cpu_exec_exit(CPUState *cpu)
 612 {
 613 }
 614 #endif
 615
 616 void cpu_exec_init(CPUState *cpu, Error **errp)
 617 {
 618     CPUClass *cc = CPU_GET_CLASS(cpu);
 619     int cpu_index;
 620     Error *local_err = NULL;
 621
 622 #ifndef CONFIG_USER_ONLY
 623     cpu->as = &address_space_memory;
 624     cpu->thread_id = qemu_get_thread_id();
 625 #endif
 626
 627 #if defined(CONFIG_USER_ONLY)
 628     cpu_list_lock();
 629 #endif
 630     cpu_index = cpu->cpu_index = cpu_get_free_index(&local_err);
 631     if (local_err) {
 632         error_propagate(errp, local_err);
 633 #if defined(CONFIG_USER_ONLY)
 634         cpu_list_unlock();
 635 #endif
 636         return;
 637     }
 638     QTAILQ_INSERT_TAIL(&cpus, cpu, node);
 639 #if defined(CONFIG_USER_ONLY)
 640     cpu_list_unlock();
 641 #endif
 642     if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
 643         vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
 644     }
 645 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
 646     register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
 647                     cpu_save, cpu_load, cpu->env_ptr);
 648     assert(cc->vmsd == NULL);
 649     assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
 650 #endif
 651     if (cc->vmsd != NULL) {
 652         vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
 653     }
 654 }
 655
 656 #if defined(CONFIG_USER_ONLY)
 657 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 658 {
 659     tb_invalidate_phys_page_range(pc, pc + 1, 0);
 660 }
 661 #else
 662 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 663 {
 664     hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
 665     if (phys != -1) {
 666         tb_invalidate_phys_addr(cpu->as,
 667                                 phys | (pc & ~TARGET_PAGE_MASK));
 668     }
 669 }
 670 #endif
 671
 672 #if defined(CONFIG_USER_ONLY)
 673 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 674
 675 {
 676 }
 677
 678 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 679                           int flags)
 680 {
 681     return -ENOSYS;
 682 }
 683
 684 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 685 {
 686 }
 687
 688 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 689                           int flags, CPUWatchpoint **watchpoint)
 690 {
 691     return -ENOSYS;
 692 }
 693 #else
 694 /* Add a watchpoint.  */
 695 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 696                           int flags, CPUWatchpoint **watchpoint)
 697 {
 698     CPUWatchpoint *wp;
 699
 700     /* forbid ranges which are empty or run off the end of the address space */
 701     if (len == 0 || (addr + len - 1) < addr) {
 702         error_report("tried to set invalid watchpoint at %"
 703                      VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
 704         return -EINVAL;
 705     }
 706     wp = g_malloc(sizeof(*wp));
 707
 708     wp->vaddr = addr;
 709     wp->len = len;
 710     wp->flags = flags;
 711
 712     /* keep all GDB-injected watchpoints in front */
 713     if (flags & BP_GDB) {
 714         QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
 715     } else {
 716         QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
 717     }
 718
 719     tlb_flush_page(cpu, addr);
 720
 721     if (watchpoint)
 722         *watchpoint = wp;
 723     return 0;
 724 }
 725
 726 /* Remove a specific watchpoint.  */
 727 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 728                           int flags)
 729 {
 730     CPUWatchpoint *wp;
 731
 732     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
 733         if (addr == wp->vaddr && len == wp->len
 734                 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
 735             cpu_watchpoint_remove_by_ref(cpu, wp);
 736             return 0;
 737         }
 738     }
 739     return -ENOENT;
 740 }
 741
 742 /* Remove a specific watchpoint by reference.  */
 743 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 744 {
 745     QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
 746
 747     tlb_flush_page(cpu, watchpoint->vaddr);
 748
 749     g_free(watchpoint);
 750 }
 751
 752 /* Remove all matching watchpoints.  */
 753 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 754 {
 755     CPUWatchpoint *wp, *next;
 756
 757     QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
 758         if (wp->flags & mask) {
 759             cpu_watchpoint_remove_by_ref(cpu, wp);
 760         }
 761     }
 762 }
 763
 764 /* Return true if this watchpoint address matches the specified
 765  * access (ie the address range covered by the watchpoint overlaps
 766  * partially or completely with the address range covered by the
 767  * access).
 768  */
 769 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
 770                                                   vaddr addr,
 771                                                   vaddr len)
 772 {
 773     /* We know the lengths are non-zero, but a little caution is
 774      * required to avoid errors in the case where the range ends
 775      * exactly at the top of the address space and so addr + len
 776      * wraps round to zero.
 777      */
 778     vaddr wpend = wp->vaddr + wp->len - 1;
 779     vaddr addrend = addr + len - 1;
 780
 781     return !(addr > wpend || wp->vaddr > addrend);
 782 }
 783
 784 #endif
 785
 786 /* Add a breakpoint.  */
 787 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
 788                           CPUBreakpoint **breakpoint)
 789 {
 790     CPUBreakpoint *bp;
 791
 792     bp = g_malloc(sizeof(*bp));
 793
 794     bp->pc = pc;
 795     bp->flags = flags;
 796
 797     /* keep all GDB-injected breakpoints in front */
 798     if (flags & BP_GDB) {
 799         QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
 800     } else {
 801         QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
 802     }
 803
 804     breakpoint_invalidate(cpu, pc);
 805
 806     if (breakpoint) {
 807         *breakpoint = bp;
 808     }
 809     return 0;
 810 }
 811
 812 /* Remove a specific breakpoint.  */
 813 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
 814 {
 815     CPUBreakpoint *bp;
 816
 817     QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
 818         if (bp->pc == pc && bp->flags == flags) {
 819             cpu_breakpoint_remove_by_ref(cpu, bp);
 820             return 0;
 821         }
 822     }
 823     return -ENOENT;
 824 }
 825
 826 /* Remove a specific breakpoint by reference.  */
 827 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
 828 {
 829     QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
 830
 831     breakpoint_invalidate(cpu, breakpoint->pc);
 832
 833     g_free(breakpoint);
 834 }
 835
 836 /* Remove all matching breakpoints. */
 837 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
 838 {
 839     CPUBreakpoint *bp, *next;
 840
 841     QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
 842         if (bp->flags & mask) {
 843             cpu_breakpoint_remove_by_ref(cpu, bp);
 844         }
 845     }
 846 }
 847
 848 /* enable or disable single step mode. EXCP_DEBUG is returned by the
 849    CPU loop after each instruction */
 850 void cpu_single_step(CPUState *cpu, int enabled)
 851 {
 852     if (cpu->singlestep_enabled != enabled) {
 853         cpu->singlestep_enabled = enabled;
 854         if (kvm_enabled()) {
 855             kvm_update_guest_debug(cpu, 0);
 856         } else {
 857             /* must flush all the translated code to avoid inconsistencies */
 858             /* XXX: only flush what is necessary */
 859             tb_flush(cpu);
 860         }
 861     }
 862 }
 863
 864 void cpu_abort(CPUState *cpu, const char *fmt, ...)
 865 {
 866     va_list ap;
 867     va_list ap2;
 868
 869     va_start(ap, fmt);
 870     va_copy(ap2, ap);
 871     fprintf(stderr, "qemu: fatal: ");
 872     vfprintf(stderr, fmt, ap);
 873     fprintf(stderr, "\n");
 874     cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 875     if (qemu_log_enabled()) {
 876         qemu_log("qemu: fatal: ");
 877         qemu_log_vprintf(fmt, ap2);
 878         qemu_log("\n");
 879         log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 880         qemu_log_flush();
 881         qemu_log_close();
 882     }
 883     va_end(ap2);
 884     va_end(ap);
 885 #if defined(CONFIG_USER_ONLY)
 886     {
 887         struct sigaction act;
 888         sigfillset(&act.sa_mask);
 889         act.sa_handler = SIG_DFL;
 890         sigaction(SIGABRT, &act, NULL);
 891     }
 892 #endif
 893     abort();
 894 }
 895
 896 #if !defined(CONFIG_USER_ONLY)
 897 /* Called from RCU critical section */
 898 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
 899 {
 900     RAMBlock *block;
 901
 902     block = atomic_rcu_read(&ram_list.mru_block);
 903     if (block && addr - block->offset < block->max_length) {
 904         goto found;
 905     }
 906     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
 907         if (addr - block->offset < block->max_length) {
 908             goto found;
 909         }
 910     }
 911
 912     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
 913     abort();
 914
 915 found:
 916     /* It is safe to write mru_block outside the iothread lock.  This
 917      * is what happens:
 918      *
 919      *     mru_block = xxx
 920      *     rcu_read_unlock()
 921      *                                        xxx removed from list
 922      *                  rcu_read_lock()
 923      *                  read mru_block
 924      *                                        mru_block = NULL;
 925      *                                        call_rcu(reclaim_ramblock, xxx);
 926      *                  rcu_read_unlock()
 927      *
 928      * atomic_rcu_set is not needed here.  The block was already published
 929      * when it was placed into the list.  Here we're just making an extra
 930      * copy of the pointer.
 931      */
 932     ram_list.mru_block = block;
 933     return block;
 934 }
 935
 936 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
 937 {
 938     CPUState *cpu;
 939     ram_addr_t start1;
 940     RAMBlock *block;
 941     ram_addr_t end;
 942
 943     end = TARGET_PAGE_ALIGN(start + length);
 944     start &= TARGET_PAGE_MASK;
 945
 946     rcu_read_lock();
 947     block = qemu_get_ram_block(start);
 948     assert(block == qemu_get_ram_block(end - 1));
 949     start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
 950     CPU_FOREACH(cpu) {
 951         tlb_reset_dirty(cpu, start1, length);
 952     }
 953     rcu_read_unlock();
 954 }
 955
 956 /* Note: start and end must be within the same ram block.  */
 957 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
 958                                               ram_addr_t length,
 959                                               unsigned client)
 960 {
 961     unsigned long end, page;
 962     bool dirty;
 963
 964     if (length == 0) {
 965         return false;
 966     }
 967
 968     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
 969     page = start >> TARGET_PAGE_BITS;
 970     dirty = bitmap_test_and_clear_atomic(ram_list.dirty_memory[client],
 971                                          page, end - page);
 972
 973     if (dirty && tcg_enabled()) {
 974         tlb_reset_dirty_range_all(start, length);
 975     }
 976
 977     return dirty;
 978 }
 979
 980 /* Called from RCU critical section */
 981 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
 982                                        MemoryRegionSection *section,
 983                                        target_ulong vaddr,
 984                                        hwaddr paddr, hwaddr xlat,
 985                                        int prot,
 986                                        target_ulong *address)
 987 {
 988     hwaddr iotlb;
 989     CPUWatchpoint *wp;
 990
 991     if (memory_region_is_ram(section->mr)) {
 992         /* Normal RAM.  */
 993         iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
 994             + xlat;
 995         if (!section->readonly) {
 996             iotlb |= PHYS_SECTION_NOTDIRTY;
 997         } else {
 998             iotlb |= PHYS_SECTION_ROM;
 999         }
1000     } else {
1001         AddressSpaceDispatch *d;
1002
1003         d = atomic_rcu_read(&section->address_space->dispatch);
1004         iotlb = section - d->map.sections;
1005         iotlb += xlat;
1006     }
1007
1008     /* Make accesses to pages with watchpoints go via the
1009        watchpoint trap routines.  */
1010     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1011         if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1012             /* Avoid trapping reads of pages with a write breakpoint. */
1013             if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1014                 iotlb = PHYS_SECTION_WATCH + paddr;
1015                 *address |= TLB_MMIO;
1016                 break;
1017             }
1018         }
1019     }
1020
1021     return iotlb;
1022 }
1023 #endif /* defined(CONFIG_USER_ONLY) */
1024
1025 #if !defined(CONFIG_USER_ONLY)
1026
1027 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1028                              uint16_t section);
1029 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1030
1031 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1032                                qemu_anon_ram_alloc;
1033
1034 /*
1035  * Set a custom physical guest memory alloator.
1036  * Accelerators with unusual needs may need this.  Hopefully, we can
1037  * get rid of it eventually.
1038  */
1039 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1040 {
1041     phys_mem_alloc = alloc;
1042 }
1043
1044 static uint16_t phys_section_add(PhysPageMap *map,
1045                                  MemoryRegionSection *section)
1046 {
1047     /* The physical section number is ORed with a page-aligned
1048      * pointer to produce the iotlb entries.  Thus it should
1049      * never overflow into the page-aligned value.
1050      */
1051     assert(map->sections_nb < TARGET_PAGE_SIZE);
1052
1053     if (map->sections_nb == map->sections_nb_alloc) {
1054         map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1055         map->sections = g_renew(MemoryRegionSection, map->sections,
1056                                 map->sections_nb_alloc);
1057     }
1058     map->sections[map->sections_nb] = *section;
1059     memory_region_ref(section->mr);
1060     return map->sections_nb++;
1061 }
1062
1063 static void phys_section_destroy(MemoryRegion *mr)
1064 {
1065     memory_region_unref(mr);
1066
1067     if (mr->subpage) {
1068         subpage_t *subpage = container_of(mr, subpage_t, iomem);
1069         object_unref(OBJECT(&subpage->iomem));
1070         g_free(subpage);
1071     }
1072 }
1073
1074 static void phys_sections_free(PhysPageMap *map)
1075 {
1076     while (map->sections_nb > 0) {
1077         MemoryRegionSection *section = &map->sections[--map->sections_nb];
1078         phys_section_destroy(section->mr);
1079     }
1080     g_free(map->sections);
1081     g_free(map->nodes);
1082 }
1083
1084 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1085 {
1086     subpage_t *subpage;
1087     hwaddr base = section->offset_within_address_space
1088         & TARGET_PAGE_MASK;
1089     MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1090                                                    d->map.nodes, d->map.sections);
1091     MemoryRegionSection subsection = {
1092         .offset_within_address_space = base,
1093         .size = int128_make64(TARGET_PAGE_SIZE),
1094     };
1095     hwaddr start, end;
1096
1097     assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1098
1099     if (!(existing->mr->subpage)) {
1100         subpage = subpage_init(d->as, base);
1101         subsection.address_space = d->as;
1102         subsection.mr = &subpage->iomem;
1103         phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1104                       phys_section_add(&d->map, &subsection));
1105     } else {
1106         subpage = container_of(existing->mr, subpage_t, iomem);
1107     }
1108     start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1109     end = start + int128_get64(section->size) - 1;
1110     subpage_register(subpage, start, end,
1111                      phys_section_add(&d->map, section));
1112 }
1113
1114
1115 static void register_multipage(AddressSpaceDispatch *d,
1116                                MemoryRegionSection *section)
1117 {
1118     hwaddr start_addr = section->offset_within_address_space;
1119     uint16_t section_index = phys_section_add(&d->map, section);
1120     uint64_t num_pages = int128_get64(int128_rshift(section->size,
1121                                                     TARGET_PAGE_BITS));
1122
1123     assert(num_pages);
1124     phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1125 }
1126
1127 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1128 {
1129     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1130     AddressSpaceDispatch *d = as->next_dispatch;
1131     MemoryRegionSection now = *section, remain = *section;
1132     Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1133
1134     if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1135         uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1136                        - now.offset_within_address_space;
1137
1138         now.size = int128_min(int128_make64(left), now.size);
1139         register_subpage(d, &now);
1140     } else {
1141         now.size = int128_zero();
1142     }
1143     while (int128_ne(remain.size, now.size)) {
1144         remain.size = int128_sub(remain.size, now.size);
1145         remain.offset_within_address_space += int128_get64(now.size);
1146         remain.offset_within_region += int128_get64(now.size);
1147         now = remain;
1148         if (int128_lt(remain.size, page_size)) {
1149             register_subpage(d, &now);
1150         } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1151             now.size = page_size;
1152             register_subpage(d, &now);
1153         } else {
1154             now.size = int128_and(now.size, int128_neg(page_size));
1155             register_multipage(d, &now);
1156         }
1157     }
1158 }
1159
1160 void qemu_flush_coalesced_mmio_buffer(void)
1161 {
1162     if (kvm_enabled())
1163         kvm_flush_coalesced_mmio_buffer();
1164 }
1165
1166 void qemu_mutex_lock_ramlist(void)
1167 {
1168     qemu_mutex_lock(&ram_list.mutex);
1169 }
1170
1171 void qemu_mutex_unlock_ramlist(void)
1172 {
1173     qemu_mutex_unlock(&ram_list.mutex);
1174 }
1175
1176 #ifdef __linux__
1177
1178 #include <sys/vfs.h>
1179
1180 #define HUGETLBFS_MAGIC       0x958458f6
1181
1182 static long gethugepagesize(const char *path, Error **errp)
1183 {
1184     struct statfs fs;
1185     int ret;
1186
1187     do {
1188         ret = statfs(path, &fs);
1189     } while (ret != 0 && errno == EINTR);
1190
1191     if (ret != 0) {
1192         error_setg_errno(errp, errno, "failed to get page size of file %s",
1193                          path);
1194         return 0;
1195     }
1196
1197     if (fs.f_type != HUGETLBFS_MAGIC)
1198         fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1199
1200     return fs.f_bsize;
1201 }
1202
1203 static void *file_ram_alloc(RAMBlock *block,
1204                             ram_addr_t memory,
1205                             const char *path,
1206                             Error **errp)
1207 {
1208     struct stat st;
1209     char *filename;
1210     char *sanitized_name;
1211     char *c;
1212     void *area;
1213     int fd;
1214     uint64_t hpagesize;
1215     Error *local_err = NULL;
1216
1217     hpagesize = gethugepagesize(path, &local_err);
1218     if (local_err) {
1219         error_propagate(errp, local_err);
1220         goto error;
1221     }
1222     block->mr->align = hpagesize;
1223
1224     if (memory < hpagesize) {
1225         error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1226                    "or larger than huge page size 0x%" PRIx64,
1227                    memory, hpagesize);
1228         goto error;
1229     }
1230
1231     if (kvm_enabled() && !kvm_has_sync_mmu()) {
1232         error_setg(errp,
1233                    "host lacks kvm mmu notifiers, -mem-path unsupported");
1234         goto error;
1235     }
1236
1237     if (!stat(path, &st) && S_ISDIR(st.st_mode)) {
1238         /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1239         sanitized_name = g_strdup(memory_region_name(block->mr));
1240         for (c = sanitized_name; *c != '\0'; c++) {
1241             if (*c == '/') {
1242                 *c = '_';
1243             }
1244         }
1245
1246         filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1247                                    sanitized_name);
1248         g_free(sanitized_name);
1249
1250         fd = mkstemp(filename);
1251         if (fd >= 0) {
1252             unlink(filename);
1253         }
1254         g_free(filename);
1255     } else {
1256         fd = open(path, O_RDWR | O_CREAT, 0644);
1257     }
1258
1259     if (fd < 0) {
1260         error_setg_errno(errp, errno,
1261                          "unable to create backing store for hugepages");
1262         goto error;
1263     }
1264
1265     memory = ROUND_UP(memory, hpagesize);
1266
1267     /*
1268      * ftruncate is not supported by hugetlbfs in older
1269      * hosts, so don't bother bailing out on errors.
1270      * If anything goes wrong with it under other filesystems,
1271      * mmap will fail.
1272      */
1273     if (ftruncate(fd, memory)) {
1274         perror("ftruncate");
1275     }
1276
1277     area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
1278     if (area == MAP_FAILED) {
1279         error_setg_errno(errp, errno,
1280                          "unable to map backing store for hugepages");
1281         close(fd);
1282         goto error;
1283     }
1284
1285     if (mem_prealloc) {
1286         os_mem_prealloc(fd, area, memory);
1287     }
1288
1289     block->fd = fd;
1290     return area;
1291
1292 error:
1293     return NULL;
1294 }
1295 #endif
1296
1297 /* Called with the ramlist lock held.  */
1298 static ram_addr_t find_ram_offset(ram_addr_t size)
1299 {
1300     RAMBlock *block, *next_block;
1301     ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1302
1303     assert(size != 0); /* it would hand out same offset multiple times */
1304
1305     if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1306         return 0;
1307     }
1308
1309     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1310         ram_addr_t end, next = RAM_ADDR_MAX;
1311
1312         end = block->offset + block->max_length;
1313
1314         QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1315             if (next_block->offset >= end) {
1316                 next = MIN(next, next_block->offset);
1317             }
1318         }
1319         if (next - end >= size && next - end < mingap) {
1320             offset = end;
1321             mingap = next - end;
1322         }
1323     }
1324
1325     if (offset == RAM_ADDR_MAX) {
1326         fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1327                 (uint64_t)size);
1328         abort();
1329     }
1330
1331     return offset;
1332 }
1333
1334 ram_addr_t last_ram_offset(void)
1335 {
1336     RAMBlock *block;
1337     ram_addr_t last = 0;
1338
1339     rcu_read_lock();
1340     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1341         last = MAX(last, block->offset + block->max_length);
1342     }
1343     rcu_read_unlock();
1344     return last;
1345 }
1346
1347 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1348 {
1349     int ret;
1350
1351     /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1352     if (!machine_dump_guest_core(current_machine)) {
1353         ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1354         if (ret) {
1355             perror("qemu_madvise");
1356             fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1357                             "but dump_guest_core=off specified\n");
1358         }
1359     }
1360 }
1361
1362 /* Called within an RCU critical section, or while the ramlist lock
1363  * is held.
1364  */
1365 static RAMBlock *find_ram_block(ram_addr_t addr)
1366 {
1367     RAMBlock *block;
1368
1369     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1370         if (block->offset == addr) {
1371             return block;
1372         }
1373     }
1374
1375     return NULL;
1376 }
1377
1378 /* Called with iothread lock held.  */
1379 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1380 {
1381     RAMBlock *new_block, *block;
1382
1383     rcu_read_lock();
1384     new_block = find_ram_block(addr);
1385     assert(new_block);
1386     assert(!new_block->idstr[0]);
1387
1388     if (dev) {
1389         char *id = qdev_get_dev_path(dev);
1390         if (id) {
1391             snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1392             g_free(id);
1393         }
1394     }
1395     pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1396
1397     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1398         if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1399             fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1400                     new_block->idstr);
1401             abort();
1402         }
1403     }
1404     rcu_read_unlock();
1405 }
1406
1407 /* Called with iothread lock held.  */
1408 void qemu_ram_unset_idstr(ram_addr_t addr)
1409 {
1410     RAMBlock *block;
1411
1412     /* FIXME: arch_init.c assumes that this is not called throughout
1413      * migration.  Ignore the problem since hot-unplug during migration
1414      * does not work anyway.
1415      */
1416
1417     rcu_read_lock();
1418     block = find_ram_block(addr);
1419     if (block) {
1420         memset(block->idstr, 0, sizeof(block->idstr));
1421     }
1422     rcu_read_unlock();
1423 }
1424
1425 static int memory_try_enable_merging(void *addr, size_t len)
1426 {
1427     if (!machine_mem_merge(current_machine)) {
1428         /* disabled by the user */
1429         return 0;
1430     }
1431
1432     return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1433 }
1434
1435 /* Only legal before guest might have detected the memory size: e.g. on
1436  * incoming migration, or right after reset.
1437  *
1438  * As memory core doesn't know how is memory accessed, it is up to
1439  * resize callback to update device state and/or add assertions to detect
1440  * misuse, if necessary.
1441  */
1442 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1443 {
1444     RAMBlock *block = find_ram_block(base);
1445
1446     assert(block);
1447
1448     newsize = TARGET_PAGE_ALIGN(newsize);
1449
1450     if (block->used_length == newsize) {
1451         return 0;
1452     }
1453
1454     if (!(block->flags & RAM_RESIZEABLE)) {
1455         error_setg_errno(errp, EINVAL,
1456                          "Length mismatch: %s: 0x" RAM_ADDR_FMT
1457                          " in != 0x" RAM_ADDR_FMT, block->idstr,
1458                          newsize, block->used_length);
1459         return -EINVAL;
1460     }
1461
1462     if (block->max_length < newsize) {
1463         error_setg_errno(errp, EINVAL,
1464                          "Length too large: %s: 0x" RAM_ADDR_FMT
1465                          " > 0x" RAM_ADDR_FMT, block->idstr,
1466                          newsize, block->max_length);
1467         return -EINVAL;
1468     }
1469
1470     cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1471     block->used_length = newsize;
1472     cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1473                                         DIRTY_CLIENTS_ALL);
1474     memory_region_set_size(block->mr, newsize);
1475     if (block->resized) {
1476         block->resized(block->idstr, newsize, block->host);
1477     }
1478     return 0;
1479 }
1480
1481 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1482 {
1483     RAMBlock *block;
1484     RAMBlock *last_block = NULL;
1485     ram_addr_t old_ram_size, new_ram_size;
1486
1487     old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1488
1489     qemu_mutex_lock_ramlist();
1490     new_block->offset = find_ram_offset(new_block->max_length);
1491
1492     if (!new_block->host) {
1493         if (xen_enabled()) {
1494             xen_ram_alloc(new_block->offset, new_block->max_length,
1495                           new_block->mr);
1496         } else {
1497             new_block->host = phys_mem_alloc(new_block->max_length,
1498                                              &new_block->mr->align);
1499             if (!new_block->host) {
1500                 error_setg_errno(errp, errno,
1501                                  "cannot set up guest memory '%s'",
1502                                  memory_region_name(new_block->mr));
1503                 qemu_mutex_unlock_ramlist();
1504                 return -1;
1505             }
1506             memory_try_enable_merging(new_block->host, new_block->max_length);
1507         }
1508     }
1509
1510     new_ram_size = MAX(old_ram_size,
1511               (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1512     if (new_ram_size > old_ram_size) {
1513         migration_bitmap_extend(old_ram_size, new_ram_size);
1514     }
1515     /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
1516      * QLIST (which has an RCU-friendly variant) does not have insertion at
1517      * tail, so save the last element in last_block.
1518      */
1519     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1520         last_block = block;
1521         if (block->max_length < new_block->max_length) {
1522             break;
1523         }
1524     }
1525     if (block) {
1526         QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1527     } else if (last_block) {
1528         QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1529     } else { /* list is empty */
1530         QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1531     }
1532     ram_list.mru_block = NULL;
1533
1534     /* Write list before version */
1535     smp_wmb();
1536     ram_list.version++;
1537     qemu_mutex_unlock_ramlist();
1538
1539     new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1540
1541     if (new_ram_size > old_ram_size) {
1542         int i;
1543
1544         /* ram_list.dirty_memory[] is protected by the iothread lock.  */
1545         for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1546             ram_list.dirty_memory[i] =
1547                 bitmap_zero_extend(ram_list.dirty_memory[i],
1548                                    old_ram_size, new_ram_size);
1549        }
1550     }
1551     cpu_physical_memory_set_dirty_range(new_block->offset,
1552                                         new_block->used_length,
1553                                         DIRTY_CLIENTS_ALL);
1554
1555     if (new_block->host) {
1556         qemu_ram_setup_dump(new_block->host, new_block->max_length);
1557         qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1558         qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1559         if (kvm_enabled()) {
1560             kvm_setup_guest_memory(new_block->host, new_block->max_length);
1561         }
1562     }
1563
1564     return new_block->offset;
1565 }
1566
1567 #ifdef __linux__
1568 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1569                                     bool share, const char *mem_path,
1570                                     Error **errp)
1571 {
1572     RAMBlock *new_block;
1573     ram_addr_t addr;
1574     Error *local_err = NULL;
1575
1576     if (xen_enabled()) {
1577         error_setg(errp, "-mem-path not supported with Xen");
1578         return -1;
1579     }
1580
1581     if (phys_mem_alloc != qemu_anon_ram_alloc) {
1582         /*
1583          * file_ram_alloc() needs to allocate just like
1584          * phys_mem_alloc, but we haven't bothered to provide
1585          * a hook there.
1586          */
1587         error_setg(errp,
1588                    "-mem-path not supported with this accelerator");
1589         return -1;
1590     }
1591
1592     size = TARGET_PAGE_ALIGN(size);
1593     new_block = g_malloc0(sizeof(*new_block));
1594     new_block->mr = mr;
1595     new_block->used_length = size;
1596     new_block->max_length = size;
1597     new_block->flags = share ? RAM_SHARED : 0;
1598     new_block->flags |= RAM_FILE;
1599     new_block->host = file_ram_alloc(new_block, size,
1600                                      mem_path, errp);
1601     if (!new_block->host) {
1602         g_free(new_block);
1603         return -1;
1604     }
1605
1606     addr = ram_block_add(new_block, &local_err);
1607     if (local_err) {
1608         g_free(new_block);
1609         error_propagate(errp, local_err);
1610         return -1;
1611     }
1612     return addr;
1613 }
1614 #endif
1615
1616 static
1617 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1618                                    void (*resized)(const char*,
1619                                                    uint64_t length,
1620                                                    void *host),
1621                                    void *host, bool resizeable,
1622                                    MemoryRegion *mr, Error **errp)
1623 {
1624     RAMBlock *new_block;
1625     ram_addr_t addr;
1626     Error *local_err = NULL;
1627
1628     size = TARGET_PAGE_ALIGN(size);
1629     max_size = TARGET_PAGE_ALIGN(max_size);
1630     new_block = g_malloc0(sizeof(*new_block));
1631     new_block->mr = mr;
1632     new_block->resized = resized;
1633     new_block->used_length = size;
1634     new_block->max_length = max_size;
1635     assert(max_size >= size);
1636     new_block->fd = -1;
1637     new_block->host = host;
1638     if (host) {
1639         new_block->flags |= RAM_PREALLOC;
1640     }
1641     if (resizeable) {
1642         new_block->flags |= RAM_RESIZEABLE;
1643     }
1644     addr = ram_block_add(new_block, &local_err);
1645     if (local_err) {
1646         g_free(new_block);
1647         error_propagate(errp, local_err);
1648         return -1;
1649     }
1650     return addr;
1651 }
1652
1653 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1654                                    MemoryRegion *mr, Error **errp)
1655 {
1656     return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1657 }
1658
1659 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1660 {
1661     return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1662 }
1663
1664 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1665                                      void (*resized)(const char*,
1666                                                      uint64_t length,
1667                                                      void *host),
1668                                      MemoryRegion *mr, Error **errp)
1669 {
1670     return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1671 }
1672
1673 void qemu_ram_free_from_ptr(ram_addr_t addr)
1674 {
1675     RAMBlock *block;
1676
1677     qemu_mutex_lock_ramlist();
1678     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1679         if (addr == block->offset) {
1680             QLIST_REMOVE_RCU(block, next);
1681             ram_list.mru_block = NULL;
1682             /* Write list before version */
1683             smp_wmb();
1684             ram_list.version++;
1685             g_free_rcu(block, rcu);
1686             break;
1687         }
1688     }
1689     qemu_mutex_unlock_ramlist();
1690 }
1691
1692 static void reclaim_ramblock(RAMBlock *block)
1693 {
1694     if (block->flags & RAM_PREALLOC) {
1695         ;
1696     } else if (xen_enabled()) {
1697         xen_invalidate_map_cache_entry(block->host);
1698 #ifndef _WIN32
1699     } else if (block->fd >= 0) {
1700         if (block->flags & RAM_FILE) {
1701             qemu_ram_munmap(block->host, block->max_length);
1702         } else {
1703             munmap(block->host, block->max_length);
1704         }
1705         close(block->fd);
1706 #endif
1707     } else {
1708         qemu_anon_ram_free(block->host, block->max_length);
1709     }
1710     g_free(block);
1711 }
1712
1713 void qemu_ram_free(ram_addr_t addr)
1714 {
1715     RAMBlock *block;
1716
1717     qemu_mutex_lock_ramlist();
1718     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1719         if (addr == block->offset) {
1720             QLIST_REMOVE_RCU(block, next);
1721             ram_list.mru_block = NULL;
1722             /* Write list before version */
1723             smp_wmb();
1724             ram_list.version++;
1725             call_rcu(block, reclaim_ramblock, rcu);
1726             break;
1727         }
1728     }
1729     qemu_mutex_unlock_ramlist();
1730 }
1731
1732 #ifndef _WIN32
1733 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1734 {
1735     RAMBlock *block;
1736     ram_addr_t offset;
1737     int flags;
1738     void *area, *vaddr;
1739
1740     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1741         offset = addr - block->offset;
1742         if (offset < block->max_length) {
1743             vaddr = ramblock_ptr(block, offset);
1744             if (block->flags & RAM_PREALLOC) {
1745                 ;
1746             } else if (xen_enabled()) {
1747                 abort();
1748             } else {
1749                 flags = MAP_FIXED;
1750                 if (block->fd >= 0) {
1751                     flags |= (block->flags & RAM_SHARED ?
1752                               MAP_SHARED : MAP_PRIVATE);
1753                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1754                                 flags, block->fd, offset);
1755                 } else {
1756                     /*
1757                      * Remap needs to match alloc.  Accelerators that
1758                      * set phys_mem_alloc never remap.  If they did,
1759                      * we'd need a remap hook here.
1760                      */
1761                     assert(phys_mem_alloc == qemu_anon_ram_alloc);
1762
1763                     flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1764                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1765                                 flags, -1, 0);
1766                 }
1767                 if (area != vaddr) {
1768                     fprintf(stderr, "Could not remap addr: "
1769                             RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1770                             length, addr);
1771                     exit(1);
1772                 }
1773                 memory_try_enable_merging(vaddr, length);
1774                 qemu_ram_setup_dump(vaddr, length);
1775             }
1776         }
1777     }
1778 }
1779 #endif /* !_WIN32 */
1780
1781 int qemu_get_ram_fd(ram_addr_t addr)
1782 {
1783     RAMBlock *block;
1784     int fd;
1785
1786     rcu_read_lock();
1787     block = qemu_get_ram_block(addr);
1788     fd = block->fd;
1789     rcu_read_unlock();
1790     return fd;
1791 }
1792
1793 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1794 {
1795     RAMBlock *block;
1796     void *ptr;
1797
1798     rcu_read_lock();
1799     block = qemu_get_ram_block(addr);
1800     ptr = ramblock_ptr(block, 0);
1801     rcu_read_unlock();
1802     return ptr;
1803 }
1804
1805 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1806  * This should not be used for general purpose DMA.  Use address_space_map
1807  * or address_space_rw instead. For local memory (e.g. video ram) that the
1808  * device owns, use memory_region_get_ram_ptr.
1809  *
1810  * By the time this function returns, the returned pointer is not protected
1811  * by RCU anymore.  If the caller is not within an RCU critical section and
1812  * does not hold the iothread lock, it must have other means of protecting the
1813  * pointer, such as a reference to the region that includes the incoming
1814  * ram_addr_t.
1815  */
1816 void *qemu_get_ram_ptr(ram_addr_t addr)
1817 {
1818     RAMBlock *block;
1819     void *ptr;
1820
1821     rcu_read_lock();
1822     block = qemu_get_ram_block(addr);
1823
1824     if (xen_enabled() && block->host == NULL) {
1825         /* We need to check if the requested address is in the RAM
1826          * because we don't want to map the entire memory in QEMU.
1827          * In that case just map until the end of the page.
1828          */
1829         if (block->offset == 0) {
1830             ptr = xen_map_cache(addr, 0, 0);
1831             goto unlock;
1832         }
1833
1834         block->host = xen_map_cache(block->offset, block->max_length, 1);
1835     }
1836     ptr = ramblock_ptr(block, addr - block->offset);
1837
1838 unlock:
1839     rcu_read_unlock();
1840     return ptr;
1841 }
1842
1843 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1844  * but takes a size argument.
1845  *
1846  * By the time this function returns, the returned pointer is not protected
1847  * by RCU anymore.  If the caller is not within an RCU critical section and
1848  * does not hold the iothread lock, it must have other means of protecting the
1849  * pointer, such as a reference to the region that includes the incoming
1850  * ram_addr_t.
1851  */
1852 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1853 {
1854     void *ptr;
1855     if (*size == 0) {
1856         return NULL;
1857     }
1858     if (xen_enabled()) {
1859         return xen_map_cache(addr, *size, 1);
1860     } else {
1861         RAMBlock *block;
1862         rcu_read_lock();
1863         QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1864             if (addr - block->offset < block->max_length) {
1865                 if (addr - block->offset + *size > block->max_length)
1866                     *size = block->max_length - addr + block->offset;
1867                 ptr = ramblock_ptr(block, addr - block->offset);
1868                 rcu_read_unlock();
1869                 return ptr;
1870             }
1871         }
1872
1873         fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1874         abort();
1875     }
1876 }
1877
1878 /* Some of the softmmu routines need to translate from a host pointer
1879  * (typically a TLB entry) back to a ram offset.
1880  *
1881  * By the time this function returns, the returned pointer is not protected
1882  * by RCU anymore.  If the caller is not within an RCU critical section and
1883  * does not hold the iothread lock, it must have other means of protecting the
1884  * pointer, such as a reference to the region that includes the incoming
1885  * ram_addr_t.
1886  */
1887 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1888 {
1889     RAMBlock *block;
1890     uint8_t *host = ptr;
1891     MemoryRegion *mr;
1892
1893     if (xen_enabled()) {
1894         rcu_read_lock();
1895         *ram_addr = xen_ram_addr_from_mapcache(ptr);
1896         mr = qemu_get_ram_block(*ram_addr)->mr;
1897         rcu_read_unlock();
1898         return mr;
1899     }
1900
1901     rcu_read_lock();
1902     block = atomic_rcu_read(&ram_list.mru_block);
1903     if (block && block->host && host - block->host < block->max_length) {
1904         goto found;
1905     }
1906
1907     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1908         /* This case append when the block is not mapped. */
1909         if (block->host == NULL) {
1910             continue;
1911         }
1912         if (host - block->host < block->max_length) {
1913             goto found;
1914         }
1915     }
1916
1917     rcu_read_unlock();
1918     return NULL;
1919
1920 found:
1921     *ram_addr = block->offset + (host - block->host);
1922     mr = block->mr;
1923     rcu_read_unlock();
1924     return mr;
1925 }
1926
1927 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1928                                uint64_t val, unsigned size)
1929 {
1930     if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1931         tb_invalidate_phys_page_fast(ram_addr, size);
1932     }
1933     switch (size) {
1934     case 1:
1935         stb_p(qemu_get_ram_ptr(ram_addr), val);
1936         break;
1937     case 2:
1938         stw_p(qemu_get_ram_ptr(ram_addr), val);
1939         break;
1940     case 4:
1941         stl_p(qemu_get_ram_ptr(ram_addr), val);
1942         break;
1943     default:
1944         abort();
1945     }
1946     /* Set both VGA and migration bits for simplicity and to remove
1947      * the notdirty callback faster.
1948      */
1949     cpu_physical_memory_set_dirty_range(ram_addr, size,
1950                                         DIRTY_CLIENTS_NOCODE);
1951     /* we remove the notdirty callback only if the code has been
1952        flushed */
1953     if (!cpu_physical_memory_is_clean(ram_addr)) {
1954         tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
1955     }
1956 }
1957
1958 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1959                                  unsigned size, bool is_write)
1960 {
1961     return is_write;
1962 }
1963
1964 static const MemoryRegionOps notdirty_mem_ops = {
1965     .write = notdirty_mem_write,
1966     .valid.accepts = notdirty_mem_accepts,
1967     .endianness = DEVICE_NATIVE_ENDIAN,
1968 };
1969
1970 /* Generate a debug exception if a watchpoint has been hit.  */
1971 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
1972 {
1973     CPUState *cpu = current_cpu;
1974     CPUArchState *env = cpu->env_ptr;
1975     target_ulong pc, cs_base;
1976     target_ulong vaddr;
1977     CPUWatchpoint *wp;
1978     int cpu_flags;
1979
1980     if (cpu->watchpoint_hit) {
1981         /* We re-entered the check after replacing the TB. Now raise
1982          * the debug interrupt so that is will trigger after the
1983          * current instruction. */
1984         cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1985         return;
1986     }
1987     vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1988     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1989         if (cpu_watchpoint_address_matches(wp, vaddr, len)
1990             && (wp->flags & flags)) {
1991             if (flags == BP_MEM_READ) {
1992                 wp->flags |= BP_WATCHPOINT_HIT_READ;
1993             } else {
1994                 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
1995             }
1996             wp->hitaddr = vaddr;
1997             wp->hitattrs = attrs;
1998             if (!cpu->watchpoint_hit) {
1999                 cpu->watchpoint_hit = wp;
2000                 tb_check_watchpoint(cpu);
2001                 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2002                     cpu->exception_index = EXCP_DEBUG;
2003                     cpu_loop_exit(cpu);
2004                 } else {
2005                     cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2006                     tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2007                     cpu_resume_from_signal(cpu, NULL);
2008                 }
2009             }
2010         } else {
2011             wp->flags &= ~BP_WATCHPOINT_HIT;
2012         }
2013     }
2014 }
2015
2016 /* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
2017    so these check for a hit then pass through to the normal out-of-line
2018    phys routines.  */
2019 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2020                                   unsigned size, MemTxAttrs attrs)
2021 {
2022     MemTxResult res;
2023     uint64_t data;
2024
2025     check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2026     switch (size) {
2027     case 1:
2028         data = address_space_ldub(&address_space_memory, addr, attrs, &res);
2029         break;
2030     case 2:
2031         data = address_space_lduw(&address_space_memory, addr, attrs, &res);
2032         break;
2033     case 4:
2034         data = address_space_ldl(&address_space_memory, addr, attrs, &res);
2035         break;
2036     default: abort();
2037     }
2038     *pdata = data;
2039     return res;
2040 }
2041
2042 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2043                                    uint64_t val, unsigned size,
2044                                    MemTxAttrs attrs)
2045 {
2046     MemTxResult res;
2047
2048     check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2049     switch (size) {
2050     case 1:
2051         address_space_stb(&address_space_memory, addr, val, attrs, &res);
2052         break;
2053     case 2:
2054         address_space_stw(&address_space_memory, addr, val, attrs, &res);
2055         break;
2056     case 4:
2057         address_space_stl(&address_space_memory, addr, val, attrs, &res);
2058         break;
2059     default: abort();
2060     }
2061     return res;
2062 }
2063
2064 static const MemoryRegionOps watch_mem_ops = {
2065     .read_with_attrs = watch_mem_read,
2066     .write_with_attrs = watch_mem_write,
2067     .endianness = DEVICE_NATIVE_ENDIAN,
2068 };
2069
2070 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2071                                 unsigned len, MemTxAttrs attrs)
2072 {
2073     subpage_t *subpage = opaque;
2074     uint8_t buf[8];
2075     MemTxResult res;
2076
2077 #if defined(DEBUG_SUBPAGE)
2078     printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2079            subpage, len, addr);
2080 #endif
2081     res = address_space_read(subpage->as, addr + subpage->base,
2082                              attrs, buf, len);
2083     if (res) {
2084         return res;
2085     }
2086     switch (len) {
2087     case 1:
2088         *data = ldub_p(buf);
2089         return MEMTX_OK;
2090     case 2:
2091         *data = lduw_p(buf);
2092         return MEMTX_OK;
2093     case 4:
2094         *data = ldl_p(buf);
2095         return MEMTX_OK;
2096     case 8:
2097         *data = ldq_p(buf);
2098         return MEMTX_OK;
2099     default:
2100         abort();
2101     }
2102 }
2103
2104 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2105                                  uint64_t value, unsigned len, MemTxAttrs attrs)
2106 {
2107     subpage_t *subpage = opaque;
2108     uint8_t buf[8];
2109
2110 #if defined(DEBUG_SUBPAGE)
2111     printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2112            " value %"PRIx64"\n",
2113            __func__, subpage, len, addr, value);
2114 #endif
2115     switch (len) {
2116     case 1:
2117         stb_p(buf, value);
2118         break;
2119     case 2:
2120         stw_p(buf, value);
2121         break;
2122     case 4:
2123         stl_p(buf, value);
2124         break;
2125     case 8:
2126         stq_p(buf, value);
2127         break;
2128     default:
2129         abort();
2130     }
2131     return address_space_write(subpage->as, addr + subpage->base,
2132                                attrs, buf, len);
2133 }
2134
2135 static bool subpage_accepts(void *opaque, hwaddr addr,
2136                             unsigned len, bool is_write)
2137 {
2138     subpage_t *subpage = opaque;
2139 #if defined(DEBUG_SUBPAGE)
2140     printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2141            __func__, subpage, is_write ? 'w' : 'r', len, addr);
2142 #endif
2143
2144     return address_space_access_valid(subpage->as, addr + subpage->base,
2145                                       len, is_write);
2146 }
2147
2148 static const MemoryRegionOps subpage_ops = {
2149     .read_with_attrs = subpage_read,
2150     .write_with_attrs = subpage_write,
2151     .impl.min_access_size = 1,
2152     .impl.max_access_size = 8,
2153     .valid.min_access_size = 1,
2154     .valid.max_access_size = 8,
2155     .valid.accepts = subpage_accepts,
2156     .endianness = DEVICE_NATIVE_ENDIAN,
2157 };
2158
2159 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2160                              uint16_t section)
2161 {
2162     int idx, eidx;
2163
2164     if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2165         return -1;
2166     idx = SUBPAGE_IDX(start);
2167     eidx = SUBPAGE_IDX(end);
2168 #if defined(DEBUG_SUBPAGE)
2169     printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2170            __func__, mmio, start, end, idx, eidx, section);
2171 #endif
2172     for (; idx <= eidx; idx++) {
2173         mmio->sub_section[idx] = section;
2174     }
2175
2176     return 0;
2177 }
2178
2179 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2180 {
2181     subpage_t *mmio;
2182
2183     mmio = g_malloc0(sizeof(subpage_t));
2184
2185     mmio->as = as;
2186     mmio->base = base;
2187     memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2188                           NULL, TARGET_PAGE_SIZE);
2189     mmio->iomem.subpage = true;
2190 #if defined(DEBUG_SUBPAGE)
2191     printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2192            mmio, base, TARGET_PAGE_SIZE);
2193 #endif
2194     subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2195
2196     return mmio;
2197 }
2198
2199 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2200                               MemoryRegion *mr)
2201 {
2202     assert(as);
2203     MemoryRegionSection section = {
2204         .address_space = as,
2205         .mr = mr,
2206         .offset_within_address_space = 0,
2207         .offset_within_region = 0,
2208         .size = int128_2_64(),
2209     };
2210
2211     return phys_section_add(map, &section);
2212 }
2213
2214 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
2215 {
2216     CPUAddressSpace *cpuas = &cpu->cpu_ases[0];
2217     AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2218     MemoryRegionSection *sections = d->map.sections;
2219
2220     return sections[index & ~TARGET_PAGE_MASK].mr;
2221 }
2222
2223 static void io_mem_init(void)
2224 {
2225     memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2226     memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2227                           NULL, UINT64_MAX);
2228     memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2229                           NULL, UINT64_MAX);
2230     memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2231                           NULL, UINT64_MAX);
2232 }
2233
2234 static void mem_begin(MemoryListener *listener)
2235 {
2236     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2237     AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2238     uint16_t n;
2239
2240     n = dummy_section(&d->map, as, &io_mem_unassigned);
2241     assert(n == PHYS_SECTION_UNASSIGNED);
2242     n = dummy_section(&d->map, as, &io_mem_notdirty);
2243     assert(n == PHYS_SECTION_NOTDIRTY);
2244     n = dummy_section(&d->map, as, &io_mem_rom);
2245     assert(n == PHYS_SECTION_ROM);
2246     n = dummy_section(&d->map, as, &io_mem_watch);
2247     assert(n == PHYS_SECTION_WATCH);
2248
2249     d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2250     d->as = as;
2251     as->next_dispatch = d;
2252 }
2253
2254 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2255 {
2256     phys_sections_free(&d->map);
2257     g_free(d);
2258 }
2259
2260 static void mem_commit(MemoryListener *listener)
2261 {
2262     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2263     AddressSpaceDispatch *cur = as->dispatch;
2264     AddressSpaceDispatch *next = as->next_dispatch;
2265
2266     phys_page_compact_all(next, next->map.nodes_nb);
2267
2268     atomic_rcu_set(&as->dispatch, next);
2269     if (cur) {
2270         call_rcu(cur, address_space_dispatch_free, rcu);
2271     }
2272 }
2273
2274 static void tcg_commit(MemoryListener *listener)
2275 {
2276     CPUAddressSpace *cpuas;
2277     AddressSpaceDispatch *d;
2278
2279     /* since each CPU stores ram addresses in its TLB cache, we must
2280        reset the modified entries */
2281     cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2282     cpu_reloading_memory_map();
2283     /* The CPU and TLB are protected by the iothread lock.
2284      * We reload the dispatch pointer now because cpu_reloading_memory_map()
2285      * may have split the RCU critical section.
2286      */
2287     d = atomic_rcu_read(&cpuas->as->dispatch);
2288     cpuas->memory_dispatch = d;
2289     tlb_flush(cpuas->cpu, 1);
2290 }
2291
2292 void address_space_init_dispatch(AddressSpace *as)
2293 {
2294     as->dispatch = NULL;
2295     as->dispatch_listener = (MemoryListener) {
2296         .begin = mem_begin,
2297         .commit = mem_commit,
2298         .region_add = mem_add,
2299         .region_nop = mem_add,
2300         .priority = 0,
2301     };
2302     memory_listener_register(&as->dispatch_listener, as);
2303 }
2304
2305 void address_space_unregister(AddressSpace *as)
2306 {
2307     memory_listener_unregister(&as->dispatch_listener);
2308 }
2309
2310 void address_space_destroy_dispatch(AddressSpace *as)
2311 {
2312     AddressSpaceDispatch *d = as->dispatch;
2313
2314     atomic_rcu_set(&as->dispatch, NULL);
2315     if (d) {
2316         call_rcu(d, address_space_dispatch_free, rcu);
2317     }
2318 }
2319
2320 static void memory_map_init(void)
2321 {
2322     system_memory = g_malloc(sizeof(*system_memory));
2323
2324     memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2325     address_space_init(&address_space_memory, system_memory, "memory");
2326
2327     system_io = g_malloc(sizeof(*system_io));
2328     memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2329                           65536);
2330     address_space_init(&address_space_io, system_io, "I/O");
2331 }
2332
2333 MemoryRegion *get_system_memory(void)
2334 {
2335     return system_memory;
2336 }
2337
2338 MemoryRegion *get_system_io(void)
2339 {
2340     return system_io;
2341 }
2342
2343 #endif /* !defined(CONFIG_USER_ONLY) */
2344
2345 /* physical memory access (slow version, mainly for debug) */
2346 #if defined(CONFIG_USER_ONLY)
2347 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2348                         uint8_t *buf, int len, int is_write)
2349 {
2350     int l, flags;
2351     target_ulong page;
2352     void * p;
2353
2354     while (len > 0) {
2355         page = addr & TARGET_PAGE_MASK;
2356         l = (page + TARGET_PAGE_SIZE) - addr;
2357         if (l > len)
2358             l = len;
2359         flags = page_get_flags(page);
2360         if (!(flags & PAGE_VALID))
2361             return -1;
2362         if (is_write) {
2363             if (!(flags & PAGE_WRITE))
2364                 return -1;
2365             /* XXX: this code should not depend on lock_user */
2366             if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2367                 return -1;
2368             memcpy(p, buf, l);
2369             unlock_user(p, addr, l);
2370         } else {
2371             if (!(flags & PAGE_READ))
2372                 return -1;
2373             /* XXX: this code should not depend on lock_user */
2374             if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2375                 return -1;
2376             memcpy(buf, p, l);
2377             unlock_user(p, addr, 0);
2378         }
2379         len -= l;
2380         buf += l;
2381         addr += l;
2382     }
2383     return 0;
2384 }
2385
2386 #else
2387
2388 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2389                                      hwaddr length)
2390 {
2391     uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2392     /* No early return if dirty_log_mask is or becomes 0, because
2393      * cpu_physical_memory_set_dirty_range will still call
2394      * xen_modified_memory.
2395      */
2396     if (dirty_log_mask) {
2397         dirty_log_mask =
2398             cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2399     }
2400     if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2401         tb_invalidate_phys_range(addr, addr + length);
2402         dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2403     }
2404     cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2405 }
2406
2407 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2408 {
2409     unsigned access_size_max = mr->ops->valid.max_access_size;
2410
2411     /* Regions are assumed to support 1-4 byte accesses unless
2412        otherwise specified.  */
2413     if (access_size_max == 0) {
2414         access_size_max = 4;
2415     }
2416
2417     /* Bound the maximum access by the alignment of the address.  */
2418     if (!mr->ops->impl.unaligned) {
2419         unsigned align_size_max = addr & -addr;
2420         if (align_size_max != 0 && align_size_max < access_size_max) {
2421             access_size_max = align_size_max;
2422         }
2423     }
2424
2425     /* Don't attempt accesses larger than the maximum.  */
2426     if (l > access_size_max) {
2427         l = access_size_max;
2428     }
2429     l = pow2floor(l);
2430
2431     return l;
2432 }
2433
2434 static bool prepare_mmio_access(MemoryRegion *mr)
2435 {
2436     bool unlocked = !qemu_mutex_iothread_locked();
2437     bool release_lock = false;
2438
2439     if (unlocked && mr->global_locking) {
2440         qemu_mutex_lock_iothread();
2441         unlocked = false;
2442         release_lock = true;
2443     }
2444     if (mr->flush_coalesced_mmio) {
2445         if (unlocked) {
2446             qemu_mutex_lock_iothread();
2447         }
2448         qemu_flush_coalesced_mmio_buffer();
2449         if (unlocked) {
2450             qemu_mutex_unlock_iothread();
2451         }
2452     }
2453
2454     return release_lock;
2455 }
2456
2457 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2458                              uint8_t *buf, int len, bool is_write)
2459 {
2460     hwaddr l;
2461     uint8_t *ptr;
2462     uint64_t val;
2463     hwaddr addr1;
2464     MemoryRegion *mr;
2465     MemTxResult result = MEMTX_OK;
2466     bool release_lock = false;
2467
2468     rcu_read_lock();
2469     while (len > 0) {
2470         l = len;
2471         mr = address_space_translate(as, addr, &addr1, &l, is_write);
2472
2473         if (is_write) {
2474             if (!memory_access_is_direct(mr, is_write)) {
2475                 release_lock |= prepare_mmio_access(mr);
2476                 l = memory_access_size(mr, l, addr1);
2477                 /* XXX: could force current_cpu to NULL to avoid
2478                    potential bugs */
2479                 switch (l) {
2480                 case 8:
2481                     /* 64 bit write access */
2482                     val = ldq_p(buf);
2483                     result |= memory_region_dispatch_write(mr, addr1, val, 8,
2484                                                            attrs);
2485                     break;
2486                 case 4:
2487                     /* 32 bit write access */
2488                     val = ldl_p(buf);
2489                     result |= memory_region_dispatch_write(mr, addr1, val, 4,
2490                                                            attrs);
2491                     break;
2492                 case 2:
2493                     /* 16 bit write access */
2494                     val = lduw_p(buf);
2495                     result |= memory_region_dispatch_write(mr, addr1, val, 2,
2496                                                            attrs);
2497                     break;
2498                 case 1:
2499                     /* 8 bit write access */
2500                     val = ldub_p(buf);
2501                     result |= memory_region_dispatch_write(mr, addr1, val, 1,
2502                                                            attrs);
2503                     break;
2504                 default:
2505                     abort();
2506                 }
2507             } else {
2508                 addr1 += memory_region_get_ram_addr(mr);
2509                 /* RAM case */
2510                 ptr = qemu_get_ram_ptr(addr1);
2511                 memcpy(ptr, buf, l);
2512                 invalidate_and_set_dirty(mr, addr1, l);
2513             }
2514         } else {
2515             if (!memory_access_is_direct(mr, is_write)) {
2516                 /* I/O case */
2517                 release_lock |= prepare_mmio_access(mr);
2518                 l = memory_access_size(mr, l, addr1);
2519                 switch (l) {
2520                 case 8:
2521                     /* 64 bit read access */
2522                     result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2523                                                           attrs);
2524                     stq_p(buf, val);
2525                     break;
2526                 case 4:
2527                     /* 32 bit read access */
2528                     result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2529                                                           attrs);
2530                     stl_p(buf, val);
2531                     break;
2532                 case 2:
2533                     /* 16 bit read access */
2534                     result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2535                                                           attrs);
2536                     stw_p(buf, val);
2537                     break;
2538                 case 1:
2539                     /* 8 bit read access */
2540                     result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2541                                                           attrs);
2542                     stb_p(buf, val);
2543                     break;
2544                 default:
2545                     abort();
2546                 }
2547             } else {
2548                 /* RAM case */
2549                 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2550                 memcpy(buf, ptr, l);
2551             }
2552         }
2553
2554         if (release_lock) {
2555             qemu_mutex_unlock_iothread();
2556             release_lock = false;
2557         }
2558
2559         len -= l;
2560         buf += l;
2561         addr += l;
2562     }
2563     rcu_read_unlock();
2564
2565     return result;
2566 }
2567
2568 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2569                                 const uint8_t *buf, int len)
2570 {
2571     return address_space_rw(as, addr, attrs, (uint8_t *)buf, len, true);
2572 }
2573
2574 MemTxResult address_space_read(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2575                                uint8_t *buf, int len)
2576 {
2577     return address_space_rw(as, addr, attrs, buf, len, false);
2578 }
2579
2580
2581 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2582                             int len, int is_write)
2583 {
2584     address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2585                      buf, len, is_write);
2586 }
2587
2588 enum write_rom_type {
2589     WRITE_DATA,
2590     FLUSH_CACHE,
2591 };
2592
2593 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2594     hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2595 {
2596     hwaddr l;
2597     uint8_t *ptr;
2598     hwaddr addr1;
2599     MemoryRegion *mr;
2600
2601     rcu_read_lock();
2602     while (len > 0) {
2603         l = len;
2604         mr = address_space_translate(as, addr, &addr1, &l, true);
2605
2606         if (!(memory_region_is_ram(mr) ||
2607               memory_region_is_romd(mr))) {
2608             l = memory_access_size(mr, l, addr1);
2609         } else {
2610             addr1 += memory_region_get_ram_addr(mr);
2611             /* ROM/RAM case */
2612             ptr = qemu_get_ram_ptr(addr1);
2613             switch (type) {
2614             case WRITE_DATA:
2615                 memcpy(ptr, buf, l);
2616                 invalidate_and_set_dirty(mr, addr1, l);
2617                 break;
2618             case FLUSH_CACHE:
2619                 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2620                 break;
2621             }
2622         }
2623         len -= l;
2624         buf += l;
2625         addr += l;
2626     }
2627     rcu_read_unlock();
2628 }
2629
2630 /* used for ROM loading : can write in RAM and ROM */
2631 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2632                                    const uint8_t *buf, int len)
2633 {
2634     cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2635 }
2636
2637 void cpu_flush_icache_range(hwaddr start, int len)
2638 {
2639     /*
2640      * This function should do the same thing as an icache flush that was
2641      * triggered from within the guest. For TCG we are always cache coherent,
2642      * so there is no need to flush anything. For KVM / Xen we need to flush
2643      * the host's instruction cache at least.
2644      */
2645     if (tcg_enabled()) {
2646         return;
2647     }
2648
2649     cpu_physical_memory_write_rom_internal(&address_space_memory,
2650                                            start, NULL, len, FLUSH_CACHE);
2651 }
2652
2653 typedef struct {
2654     MemoryRegion *mr;
2655     void *buffer;
2656     hwaddr addr;
2657     hwaddr len;
2658     bool in_use;
2659 } BounceBuffer;
2660
2661 static BounceBuffer bounce;
2662
2663 typedef struct MapClient {
2664     QEMUBH *bh;
2665     QLIST_ENTRY(MapClient) link;
2666 } MapClient;
2667
2668 QemuMutex map_client_list_lock;
2669 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2670     = QLIST_HEAD_INITIALIZER(map_client_list);
2671
2672 static void cpu_unregister_map_client_do(MapClient *client)
2673 {
2674     QLIST_REMOVE(client, link);
2675     g_free(client);
2676 }
2677
2678 static void cpu_notify_map_clients_locked(void)
2679 {
2680     MapClient *client;
2681
2682     while (!QLIST_EMPTY(&map_client_list)) {
2683         client = QLIST_FIRST(&map_client_list);
2684         qemu_bh_schedule(client->bh);
2685         cpu_unregister_map_client_do(client);
2686     }
2687 }
2688
2689 void cpu_register_map_client(QEMUBH *bh)
2690 {
2691     MapClient *client = g_malloc(sizeof(*client));
2692
2693     qemu_mutex_lock(&map_client_list_lock);
2694     client->bh = bh;
2695     QLIST_INSERT_HEAD(&map_client_list, client, link);
2696     if (!atomic_read(&bounce.in_use)) {
2697         cpu_notify_map_clients_locked();
2698     }
2699     qemu_mutex_unlock(&map_client_list_lock);
2700 }
2701
2702 void cpu_exec_init_all(void)
2703 {
2704     qemu_mutex_init(&ram_list.mutex);
2705     io_mem_init();
2706     memory_map_init();
2707     qemu_mutex_init(&map_client_list_lock);
2708 }
2709
2710 void cpu_unregister_map_client(QEMUBH *bh)
2711 {
2712     MapClient *client;
2713
2714     qemu_mutex_lock(&map_client_list_lock);
2715     QLIST_FOREACH(client, &map_client_list, link) {
2716         if (client->bh == bh) {
2717             cpu_unregister_map_client_do(client);
2718             break;
2719         }
2720     }
2721     qemu_mutex_unlock(&map_client_list_lock);
2722 }
2723
2724 static void cpu_notify_map_clients(void)
2725 {
2726     qemu_mutex_lock(&map_client_list_lock);
2727     cpu_notify_map_clients_locked();
2728     qemu_mutex_unlock(&map_client_list_lock);
2729 }
2730
2731 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2732 {
2733     MemoryRegion *mr;
2734     hwaddr l, xlat;
2735
2736     rcu_read_lock();
2737     while (len > 0) {
2738         l = len;
2739         mr = address_space_translate(as, addr, &xlat, &l, is_write);
2740         if (!memory_access_is_direct(mr, is_write)) {
2741             l = memory_access_size(mr, l, addr);
2742             if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2743                 return false;
2744             }
2745         }
2746
2747         len -= l;
2748         addr += l;
2749     }
2750     rcu_read_unlock();
2751     return true;
2752 }
2753
2754 /* Map a physical memory region into a host virtual address.
2755  * May map a subset of the requested range, given by and returned in *plen.
2756  * May return NULL if resources needed to perform the mapping are exhausted.
2757  * Use only for reads OR writes - not for read-modify-write operations.
2758  * Use cpu_register_map_client() to know when retrying the map operation is
2759  * likely to succeed.
2760  */
2761 void *address_space_map(AddressSpace *as,
2762                         hwaddr addr,
2763                         hwaddr *plen,
2764                         bool is_write)
2765 {
2766     hwaddr len = *plen;
2767     hwaddr done = 0;
2768     hwaddr l, xlat, base;
2769     MemoryRegion *mr, *this_mr;
2770     ram_addr_t raddr;
2771
2772     if (len == 0) {
2773         return NULL;
2774     }
2775
2776     l = len;
2777     rcu_read_lock();
2778     mr = address_space_translate(as, addr, &xlat, &l, is_write);
2779
2780     if (!memory_access_is_direct(mr, is_write)) {
2781         if (atomic_xchg(&bounce.in_use, true)) {
2782             rcu_read_unlock();
2783             return NULL;
2784         }
2785         /* Avoid unbounded allocations */
2786         l = MIN(l, TARGET_PAGE_SIZE);
2787         bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2788         bounce.addr = addr;
2789         bounce.len = l;
2790
2791         memory_region_ref(mr);
2792         bounce.mr = mr;
2793         if (!is_write) {
2794             address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2795                                bounce.buffer, l);
2796         }
2797
2798         rcu_read_unlock();
2799         *plen = l;
2800         return bounce.buffer;
2801     }
2802
2803     base = xlat;
2804     raddr = memory_region_get_ram_addr(mr);
2805
2806     for (;;) {
2807         len -= l;
2808         addr += l;
2809         done += l;
2810         if (len == 0) {
2811             break;
2812         }
2813
2814         l = len;
2815         this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2816         if (this_mr != mr || xlat != base + done) {
2817             break;
2818         }
2819     }
2820
2821     memory_region_ref(mr);
2822     rcu_read_unlock();
2823     *plen = done;
2824     return qemu_ram_ptr_length(raddr + base, plen);
2825 }
2826
2827 /* Unmaps a memory region previously mapped by address_space_map().
2828  * Will also mark the memory as dirty if is_write == 1.  access_len gives
2829  * the amount of memory that was actually read or written by the caller.
2830  */
2831 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2832                          int is_write, hwaddr access_len)
2833 {
2834     if (buffer != bounce.buffer) {
2835         MemoryRegion *mr;
2836         ram_addr_t addr1;
2837
2838         mr = qemu_ram_addr_from_host(buffer, &addr1);
2839         assert(mr != NULL);
2840         if (is_write) {
2841             invalidate_and_set_dirty(mr, addr1, access_len);
2842         }
2843         if (xen_enabled()) {
2844             xen_invalidate_map_cache_entry(buffer);
2845         }
2846         memory_region_unref(mr);
2847         return;
2848     }
2849     if (is_write) {
2850         address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
2851                             bounce.buffer, access_len);
2852     }
2853     qemu_vfree(bounce.buffer);
2854     bounce.buffer = NULL;
2855     memory_region_unref(bounce.mr);
2856     atomic_mb_set(&bounce.in_use, false);
2857     cpu_notify_map_clients();
2858 }
2859
2860 void *cpu_physical_memory_map(hwaddr addr,
2861                               hwaddr *plen,
2862                               int is_write)
2863 {
2864     return address_space_map(&address_space_memory, addr, plen, is_write);
2865 }
2866
2867 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2868                                int is_write, hwaddr access_len)
2869 {
2870     return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2871 }
2872
2873 /* warning: addr must be aligned */
2874 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
2875                                                   MemTxAttrs attrs,
2876                                                   MemTxResult *result,
2877                                                   enum device_endian endian)
2878 {
2879     uint8_t *ptr;
2880     uint64_t val;
2881     MemoryRegion *mr;
2882     hwaddr l = 4;
2883     hwaddr addr1;
2884     MemTxResult r;
2885     bool release_lock = false;
2886
2887     rcu_read_lock();
2888     mr = address_space_translate(as, addr, &addr1, &l, false);
2889     if (l < 4 || !memory_access_is_direct(mr, false)) {
2890         release_lock |= prepare_mmio_access(mr);
2891
2892         /* I/O case */
2893         r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
2894 #if defined(TARGET_WORDS_BIGENDIAN)
2895         if (endian == DEVICE_LITTLE_ENDIAN) {
2896             val = bswap32(val);
2897         }
2898 #else
2899         if (endian == DEVICE_BIG_ENDIAN) {
2900             val = bswap32(val);
2901         }
2902 #endif
2903     } else {
2904         /* RAM case */
2905         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2906                                 & TARGET_PAGE_MASK)
2907                                + addr1);
2908         switch (endian) {
2909         case DEVICE_LITTLE_ENDIAN:
2910             val = ldl_le_p(ptr);
2911             break;
2912         case DEVICE_BIG_ENDIAN:
2913             val = ldl_be_p(ptr);
2914             break;
2915         default:
2916             val = ldl_p(ptr);
2917             break;
2918         }
2919         r = MEMTX_OK;
2920     }
2921     if (result) {
2922         *result = r;
2923     }
2924     if (release_lock) {
2925         qemu_mutex_unlock_iothread();
2926     }
2927     rcu_read_unlock();
2928     return val;
2929 }
2930
2931 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
2932                            MemTxAttrs attrs, MemTxResult *result)
2933 {
2934     return address_space_ldl_internal(as, addr, attrs, result,
2935                                       DEVICE_NATIVE_ENDIAN);
2936 }
2937
2938 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
2939                               MemTxAttrs attrs, MemTxResult *result)
2940 {
2941     return address_space_ldl_internal(as, addr, attrs, result,
2942                                       DEVICE_LITTLE_ENDIAN);
2943 }
2944
2945 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
2946                               MemTxAttrs attrs, MemTxResult *result)
2947 {
2948     return address_space_ldl_internal(as, addr, attrs, result,
2949                                       DEVICE_BIG_ENDIAN);
2950 }
2951
2952 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2953 {
2954     return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2955 }
2956
2957 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2958 {
2959     return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2960 }
2961
2962 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2963 {
2964     return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2965 }
2966
2967 /* warning: addr must be aligned */
2968 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
2969                                                   MemTxAttrs attrs,
2970                                                   MemTxResult *result,
2971                                                   enum device_endian endian)
2972 {
2973     uint8_t *ptr;
2974     uint64_t val;
2975     MemoryRegion *mr;
2976     hwaddr l = 8;
2977     hwaddr addr1;
2978     MemTxResult r;
2979     bool release_lock = false;
2980
2981     rcu_read_lock();
2982     mr = address_space_translate(as, addr, &addr1, &l,
2983                                  false);
2984     if (l < 8 || !memory_access_is_direct(mr, false)) {
2985         release_lock |= prepare_mmio_access(mr);
2986
2987         /* I/O case */
2988         r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
2989 #if defined(TARGET_WORDS_BIGENDIAN)
2990         if (endian == DEVICE_LITTLE_ENDIAN) {
2991             val = bswap64(val);
2992         }
2993 #else
2994         if (endian == DEVICE_BIG_ENDIAN) {
2995             val = bswap64(val);
2996         }
2997 #endif
2998     } else {
2999         /* RAM case */
3000         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3001                                 & TARGET_PAGE_MASK)
3002                                + addr1);
3003         switch (endian) {
3004         case DEVICE_LITTLE_ENDIAN:
3005             val = ldq_le_p(ptr);
3006             break;
3007         case DEVICE_BIG_ENDIAN:
3008             val = ldq_be_p(ptr);
3009             break;
3010         default:
3011             val = ldq_p(ptr);
3012             break;
3013         }
3014         r = MEMTX_OK;
3015     }
3016     if (result) {
3017         *result = r;
3018     }
3019     if (release_lock) {
3020         qemu_mutex_unlock_iothread();
3021     }
3022     rcu_read_unlock();
3023     return val;
3024 }
3025
3026 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3027                            MemTxAttrs attrs, MemTxResult *result)
3028 {
3029     return address_space_ldq_internal(as, addr, attrs, result,
3030                                       DEVICE_NATIVE_ENDIAN);
3031 }
3032
3033 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3034                            MemTxAttrs attrs, MemTxResult *result)
3035 {
3036     return address_space_ldq_internal(as, addr, attrs, result,
3037                                       DEVICE_LITTLE_ENDIAN);
3038 }
3039
3040 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3041                            MemTxAttrs attrs, MemTxResult *result)
3042 {
3043     return address_space_ldq_internal(as, addr, attrs, result,
3044                                       DEVICE_BIG_ENDIAN);
3045 }
3046
3047 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3048 {
3049     return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3050 }
3051
3052 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3053 {
3054     return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3055 }
3056
3057 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3058 {
3059     return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3060 }
3061
3062 /* XXX: optimize */
3063 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3064                             MemTxAttrs attrs, MemTxResult *result)
3065 {
3066     uint8_t val;
3067     MemTxResult r;
3068
3069     r = address_space_rw(as, addr, attrs, &val, 1, 0);
3070     if (result) {
3071         *result = r;
3072     }
3073     return val;
3074 }
3075
3076 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3077 {
3078     return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3079 }
3080
3081 /* warning: addr must be aligned */
3082 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3083                                                    hwaddr addr,
3084                                                    MemTxAttrs attrs,
3085                                                    MemTxResult *result,
3086                                                    enum device_endian endian)
3087 {
3088     uint8_t *ptr;
3089     uint64_t val;
3090     MemoryRegion *mr;
3091     hwaddr l = 2;
3092     hwaddr addr1;
3093     MemTxResult r;
3094     bool release_lock = false;
3095
3096     rcu_read_lock();
3097     mr = address_space_translate(as, addr, &addr1, &l,
3098                                  false);
3099     if (l < 2 || !memory_access_is_direct(mr, false)) {
3100         release_lock |= prepare_mmio_access(mr);
3101
3102         /* I/O case */
3103         r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3104 #if defined(TARGET_WORDS_BIGENDIAN)
3105         if (endian == DEVICE_LITTLE_ENDIAN) {
3106             val = bswap16(val);
3107         }
3108 #else
3109         if (endian == DEVICE_BIG_ENDIAN) {
3110             val = bswap16(val);
3111         }
3112 #endif
3113     } else {
3114         /* RAM case */
3115         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3116                                 & TARGET_PAGE_MASK)
3117                                + addr1);
3118         switch (endian) {
3119         case DEVICE_LITTLE_ENDIAN:
3120             val = lduw_le_p(ptr);
3121             break;
3122         case DEVICE_BIG_ENDIAN:
3123             val = lduw_be_p(ptr);
3124             break;
3125         default:
3126             val = lduw_p(ptr);
3127             break;
3128         }
3129         r = MEMTX_OK;
3130     }
3131     if (result) {
3132         *result = r;
3133     }
3134     if (release_lock) {
3135         qemu_mutex_unlock_iothread();
3136     }
3137     rcu_read_unlock();
3138     return val;
3139 }
3140
3141 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3142                            MemTxAttrs attrs, MemTxResult *result)
3143 {
3144     return address_space_lduw_internal(as, addr, attrs, result,
3145                                        DEVICE_NATIVE_ENDIAN);
3146 }
3147
3148 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3149                            MemTxAttrs attrs, MemTxResult *result)
3150 {
3151     return address_space_lduw_internal(as, addr, attrs, result,
3152                                        DEVICE_LITTLE_ENDIAN);
3153 }
3154
3155 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3156                            MemTxAttrs attrs, MemTxResult *result)
3157 {
3158     return address_space_lduw_internal(as, addr, attrs, result,
3159                                        DEVICE_BIG_ENDIAN);
3160 }
3161
3162 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3163 {
3164     return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3165 }
3166
3167 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3168 {
3169     return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3170 }
3171
3172 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3173 {
3174     return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3175 }
3176
3177 /* warning: addr must be aligned. The ram page is not masked as dirty
3178    and the code inside is not invalidated. It is useful if the dirty
3179    bits are used to track modified PTEs */
3180 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3181                                 MemTxAttrs attrs, MemTxResult *result)
3182 {
3183     uint8_t *ptr;
3184     MemoryRegion *mr;
3185     hwaddr l = 4;
3186     hwaddr addr1;
3187     MemTxResult r;
3188     uint8_t dirty_log_mask;
3189     bool release_lock = false;
3190
3191     rcu_read_lock();
3192     mr = address_space_translate(as, addr, &addr1, &l,
3193                                  true);
3194     if (l < 4 || !memory_access_is_direct(mr, true)) {
3195         release_lock |= prepare_mmio_access(mr);
3196
3197         r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3198     } else {
3199         addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3200         ptr = qemu_get_ram_ptr(addr1);
3201         stl_p(ptr, val);
3202
3203         dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3204         dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3205         cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3206         r = MEMTX_OK;
3207     }
3208     if (result) {
3209         *result = r;
3210     }
3211     if (release_lock) {
3212         qemu_mutex_unlock_iothread();
3213     }
3214     rcu_read_unlock();
3215 }
3216
3217 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3218 {
3219     address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3220 }
3221
3222 /* warning: addr must be aligned */
3223 static inline void address_space_stl_internal(AddressSpace *as,
3224                                               hwaddr addr, uint32_t val,
3225                                               MemTxAttrs attrs,
3226                                               MemTxResult *result,
3227                                               enum device_endian endian)
3228 {
3229     uint8_t *ptr;
3230     MemoryRegion *mr;
3231     hwaddr l = 4;
3232     hwaddr addr1;
3233     MemTxResult r;
3234     bool release_lock = false;
3235
3236     rcu_read_lock();
3237     mr = address_space_translate(as, addr, &addr1, &l,
3238                                  true);
3239     if (l < 4 || !memory_access_is_direct(mr, true)) {
3240         release_lock |= prepare_mmio_access(mr);
3241
3242 #if defined(TARGET_WORDS_BIGENDIAN)
3243         if (endian == DEVICE_LITTLE_ENDIAN) {
3244             val = bswap32(val);
3245         }
3246 #else
3247         if (endian == DEVICE_BIG_ENDIAN) {
3248             val = bswap32(val);
3249         }
3250 #endif
3251         r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3252     } else {
3253         /* RAM case */
3254         addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3255         ptr = qemu_get_ram_ptr(addr1);
3256         switch (endian) {
3257         case DEVICE_LITTLE_ENDIAN:
3258             stl_le_p(ptr, val);
3259             break;
3260         case DEVICE_BIG_ENDIAN:
3261             stl_be_p(ptr, val);
3262             break;
3263         default:
3264             stl_p(ptr, val);
3265             break;
3266         }
3267         invalidate_and_set_dirty(mr, addr1, 4);
3268         r = MEMTX_OK;
3269     }
3270     if (result) {
3271         *result = r;
3272     }
3273     if (release_lock) {
3274         qemu_mutex_unlock_iothread();
3275     }
3276     rcu_read_unlock();
3277 }
3278
3279 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3280                        MemTxAttrs attrs, MemTxResult *result)
3281 {
3282     address_space_stl_internal(as, addr, val, attrs, result,
3283                                DEVICE_NATIVE_ENDIAN);
3284 }
3285
3286 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3287                        MemTxAttrs attrs, MemTxResult *result)
3288 {
3289     address_space_stl_internal(as, addr, val, attrs, result,
3290                                DEVICE_LITTLE_ENDIAN);
3291 }
3292
3293 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3294                        MemTxAttrs attrs, MemTxResult *result)
3295 {
3296     address_space_stl_internal(as, addr, val, attrs, result,
3297                                DEVICE_BIG_ENDIAN);
3298 }
3299
3300 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3301 {
3302     address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3303 }
3304
3305 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3306 {
3307     address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3308 }
3309
3310 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3311 {
3312     address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3313 }
3314
3315 /* XXX: optimize */
3316 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3317                        MemTxAttrs attrs, MemTxResult *result)
3318 {
3319     uint8_t v = val;
3320     MemTxResult r;
3321
3322     r = address_space_rw(as, addr, attrs, &v, 1, 1);
3323     if (result) {
3324         *result = r;
3325     }
3326 }
3327
3328 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3329 {
3330     address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3331 }
3332
3333 /* warning: addr must be aligned */
3334 static inline void address_space_stw_internal(AddressSpace *as,
3335                                               hwaddr addr, uint32_t val,
3336                                               MemTxAttrs attrs,
3337                                               MemTxResult *result,
3338                                               enum device_endian endian)
3339 {
3340     uint8_t *ptr;
3341     MemoryRegion *mr;
3342     hwaddr l = 2;
3343     hwaddr addr1;
3344     MemTxResult r;
3345     bool release_lock = false;
3346
3347     rcu_read_lock();
3348     mr = address_space_translate(as, addr, &addr1, &l, true);
3349     if (l < 2 || !memory_access_is_direct(mr, true)) {
3350         release_lock |= prepare_mmio_access(mr);
3351
3352 #if defined(TARGET_WORDS_BIGENDIAN)
3353         if (endian == DEVICE_LITTLE_ENDIAN) {
3354             val = bswap16(val);
3355         }
3356 #else
3357         if (endian == DEVICE_BIG_ENDIAN) {
3358             val = bswap16(val);
3359         }
3360 #endif
3361         r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3362     } else {
3363         /* RAM case */
3364         addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3365         ptr = qemu_get_ram_ptr(addr1);
3366         switch (endian) {
3367         case DEVICE_LITTLE_ENDIAN:
3368             stw_le_p(ptr, val);
3369             break;
3370         case DEVICE_BIG_ENDIAN:
3371             stw_be_p(ptr, val);
3372             break;
3373         default:
3374             stw_p(ptr, val);
3375             break;
3376         }
3377         invalidate_and_set_dirty(mr, addr1, 2);
3378         r = MEMTX_OK;
3379     }
3380     if (result) {
3381         *result = r;
3382     }
3383     if (release_lock) {
3384         qemu_mutex_unlock_iothread();
3385     }
3386     rcu_read_unlock();
3387 }
3388
3389 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3390                        MemTxAttrs attrs, MemTxResult *result)
3391 {
3392     address_space_stw_internal(as, addr, val, attrs, result,
3393                                DEVICE_NATIVE_ENDIAN);
3394 }
3395
3396 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3397                        MemTxAttrs attrs, MemTxResult *result)
3398 {
3399     address_space_stw_internal(as, addr, val, attrs, result,
3400                                DEVICE_LITTLE_ENDIAN);
3401 }
3402
3403 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3404                        MemTxAttrs attrs, MemTxResult *result)
3405 {
3406     address_space_stw_internal(as, addr, val, attrs, result,
3407                                DEVICE_BIG_ENDIAN);
3408 }
3409
3410 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3411 {
3412     address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3413 }
3414
3415 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3416 {
3417     address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3418 }
3419
3420 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3421 {
3422     address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3423 }
3424
3425 /* XXX: optimize */
3426 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3427                        MemTxAttrs attrs, MemTxResult *result)
3428 {
3429     MemTxResult r;
3430     val = tswap64(val);
3431     r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3432     if (result) {
3433         *result = r;
3434     }
3435 }
3436
3437 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3438                        MemTxAttrs attrs, MemTxResult *result)
3439 {
3440     MemTxResult r;
3441     val = cpu_to_le64(val);
3442     r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3443     if (result) {
3444         *result = r;
3445     }
3446 }
3447 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3448                        MemTxAttrs attrs, MemTxResult *result)
3449 {
3450     MemTxResult r;
3451     val = cpu_to_be64(val);
3452     r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3453     if (result) {
3454         *result = r;
3455     }
3456 }
3457
3458 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3459 {
3460     address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3461 }
3462
3463 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3464 {
3465     address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3466 }
3467
3468 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3469 {
3470     address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3471 }
3472
3473 /* virtual memory access for debug (includes writing to ROM) */
3474 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3475                         uint8_t *buf, int len, int is_write)
3476 {
3477     int l;
3478     hwaddr phys_addr;
3479     target_ulong page;
3480
3481     while (len > 0) {
3482         page = addr & TARGET_PAGE_MASK;
3483         phys_addr = cpu_get_phys_page_debug(cpu, page);
3484         /* if no physical page mapped, return an error */
3485         if (phys_addr == -1)
3486             return -1;
3487         l = (page + TARGET_PAGE_SIZE) - addr;
3488         if (l > len)
3489             l = len;
3490         phys_addr += (addr & ~TARGET_PAGE_MASK);
3491         if (is_write) {
3492             cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
3493         } else {
3494             address_space_rw(cpu->as, phys_addr, MEMTXATTRS_UNSPECIFIED,
3495                              buf, l, 0);
3496         }
3497         len -= l;
3498         buf += l;
3499         addr += l;
3500     }
3501     return 0;
3502 }
3503 #endif
3504
3505 /*
3506  * A helper function for the _utterly broken_ virtio device model to find out if
3507  * it's running on a big endian machine. Don't do this at home kids!
3508  */
3509 bool target_words_bigendian(void);
3510 bool target_words_bigendian(void)
3511 {
3512 #if defined(TARGET_WORDS_BIGENDIAN)
3513     return true;
3514 #else
3515     return false;
3516 #endif
3517 }
3518
3519 #ifndef CONFIG_USER_ONLY
3520 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3521 {
3522     MemoryRegion*mr;
3523     hwaddr l = 1;
3524     bool res;
3525
3526     rcu_read_lock();
3527     mr = address_space_translate(&address_space_memory,
3528                                  phys_addr, &phys_addr, &l, false);
3529
3530     res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3531     rcu_read_unlock();
3532     return res;
3533 }
3534
3535 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3536 {
3537     RAMBlock *block;
3538     int ret = 0;
3539
3540     rcu_read_lock();
3541     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3542         ret = func(block->idstr, block->host, block->offset,
3543                    block->used_length, opaque);
3544         if (ret) {
3545             break;
3546         }
3547     }
3548     rcu_read_unlock();
3549     return ret;
3550 }
3551 #endif