exec.c

   1 /*
   2  *  Virtual page mapping
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include "config.h"
  20 #ifndef _WIN32
  21 #include <sys/types.h>
  22 #include <sys/mman.h>
  23 #endif
  24
  25 #include "qemu-common.h"
  26 #include "cpu.h"
  27 #include "tcg.h"
  28 #include "hw/hw.h"
  29 #if !defined(CONFIG_USER_ONLY)
  30 #include "hw/boards.h"
  31 #endif
  32 #include "hw/qdev.h"
  33 #include "qemu/osdep.h"
  34 #include "sysemu/kvm.h"
  35 #include "sysemu/sysemu.h"
  36 #include "hw/xen/xen.h"
  37 #include "qemu/timer.h"
  38 #include "qemu/config-file.h"
  39 #include "qemu/error-report.h"
  40 #include "exec/memory.h"
  41 #include "sysemu/dma.h"
  42 #include "exec/address-spaces.h"
  43 #if defined(CONFIG_USER_ONLY)
  44 #include <qemu.h>
  45 #else /* !CONFIG_USER_ONLY */
  46 #include "sysemu/xen-mapcache.h"
  47 #include "trace.h"
  48 #endif
  49 #include "exec/cpu-all.h"
  50 #include "qemu/rcu_queue.h"
  51 #include "qemu/main-loop.h"
  52 #include "translate-all.h"
  53 #include "sysemu/replay.h"
  54
  55 #include "exec/memory-internal.h"
  56 #include "exec/ram_addr.h"
  57
  58 #include "qemu/range.h"
  59 #ifndef _WIN32
  60 #include "qemu/mmap-alloc.h"
  61 #endif
  62
  63 //#define DEBUG_SUBPAGE
  64
  65 #if !defined(CONFIG_USER_ONLY)
  66 /* ram_list is read under rcu_read_lock()/rcu_read_unlock().  Writes
  67  * are protected by the ramlist lock.
  68  */
  69 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
  70
  71 static MemoryRegion *system_memory;
  72 static MemoryRegion *system_io;
  73
  74 AddressSpace address_space_io;
  75 AddressSpace address_space_memory;
  76
  77 MemoryRegion io_mem_rom, io_mem_notdirty;
  78 static MemoryRegion io_mem_unassigned;
  79
  80 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
  81 #define RAM_PREALLOC   (1 << 0)
  82
  83 /* RAM is mmap-ed with MAP_SHARED */
  84 #define RAM_SHARED     (1 << 1)
  85
  86 /* Only a portion of RAM (used_length) is actually used, and migrated.
  87  * This used_length size can change across reboots.
  88  */
  89 #define RAM_RESIZEABLE (1 << 2)
  90
  91 /* RAM is backed by an mmapped file.
  92  */
  93 #define RAM_FILE (1 << 3)
  94 #endif
  95
  96 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
  97 /* current CPU in the current thread. It is only valid inside
  98    cpu_exec() */
  99 __thread CPUState *current_cpu;
 100 /* 0 = Do not count executed instructions.
 101    1 = Precise instruction counting.
 102    2 = Adaptive rate instruction counting.  */
 103 int use_icount;
 104
 105 #if !defined(CONFIG_USER_ONLY)
 106
 107 typedef struct PhysPageEntry PhysPageEntry;
 108
 109 struct PhysPageEntry {
 110     /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
 111     uint32_t skip : 6;
 112      /* index into phys_sections (!skip) or phys_map_nodes (skip) */
 113     uint32_t ptr : 26;
 114 };
 115
 116 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
 117
 118 /* Size of the L2 (and L3, etc) page tables.  */
 119 #define ADDR_SPACE_BITS 64
 120
 121 #define P_L2_BITS 9
 122 #define P_L2_SIZE (1 << P_L2_BITS)
 123
 124 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
 125
 126 typedef PhysPageEntry Node[P_L2_SIZE];
 127
 128 typedef struct PhysPageMap {
 129     struct rcu_head rcu;
 130
 131     unsigned sections_nb;
 132     unsigned sections_nb_alloc;
 133     unsigned nodes_nb;
 134     unsigned nodes_nb_alloc;
 135     Node *nodes;
 136     MemoryRegionSection *sections;
 137 } PhysPageMap;
 138
 139 struct AddressSpaceDispatch {
 140     struct rcu_head rcu;
 141
 142     /* This is a multi-level map on the physical address space.
 143      * The bottom level has pointers to MemoryRegionSections.
 144      */
 145     PhysPageEntry phys_map;
 146     PhysPageMap map;
 147     AddressSpace *as;
 148 };
 149
 150 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
 151 typedef struct subpage_t {
 152     MemoryRegion iomem;
 153     AddressSpace *as;
 154     hwaddr base;
 155     uint16_t sub_section[TARGET_PAGE_SIZE];
 156 } subpage_t;
 157
 158 #define PHYS_SECTION_UNASSIGNED 0
 159 #define PHYS_SECTION_NOTDIRTY 1
 160 #define PHYS_SECTION_ROM 2
 161 #define PHYS_SECTION_WATCH 3
 162
 163 static void io_mem_init(void);
 164 static void memory_map_init(void);
 165 static void tcg_commit(MemoryListener *listener);
 166
 167 static MemoryRegion io_mem_watch;
 168
 169 /**
 170  * CPUAddressSpace: all the information a CPU needs about an AddressSpace
 171  * @cpu: the CPU whose AddressSpace this is
 172  * @as: the AddressSpace itself
 173  * @memory_dispatch: its dispatch pointer (cached, RCU protected)
 174  * @tcg_as_listener: listener for tracking changes to the AddressSpace
 175  */
 176 struct CPUAddressSpace {
 177     CPUState *cpu;
 178     AddressSpace *as;
 179     struct AddressSpaceDispatch *memory_dispatch;
 180     MemoryListener tcg_as_listener;
 181 };
 182
 183 #endif
 184
 185 #if !defined(CONFIG_USER_ONLY)
 186
 187 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
 188 {
 189     if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
 190         map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
 191         map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
 192         map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
 193     }
 194 }
 195
 196 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
 197 {
 198     unsigned i;
 199     uint32_t ret;
 200     PhysPageEntry e;
 201     PhysPageEntry *p;
 202
 203     ret = map->nodes_nb++;
 204     p = map->nodes[ret];
 205     assert(ret != PHYS_MAP_NODE_NIL);
 206     assert(ret != map->nodes_nb_alloc);
 207
 208     e.skip = leaf ? 0 : 1;
 209     e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
 210     for (i = 0; i < P_L2_SIZE; ++i) {
 211         memcpy(&p[i], &e, sizeof(e));
 212     }
 213     return ret;
 214 }
 215
 216 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
 217                                 hwaddr *index, hwaddr *nb, uint16_t leaf,
 218                                 int level)
 219 {
 220     PhysPageEntry *p;
 221     hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
 222
 223     if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
 224         lp->ptr = phys_map_node_alloc(map, level == 0);
 225     }
 226     p = map->nodes[lp->ptr];
 227     lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
 228
 229     while (*nb && lp < &p[P_L2_SIZE]) {
 230         if ((*index & (step - 1)) == 0 && *nb >= step) {
 231             lp->skip = 0;
 232             lp->ptr = leaf;
 233             *index += step;
 234             *nb -= step;
 235         } else {
 236             phys_page_set_level(map, lp, index, nb, leaf, level - 1);
 237         }
 238         ++lp;
 239     }
 240 }
 241
 242 static void phys_page_set(AddressSpaceDispatch *d,
 243                           hwaddr index, hwaddr nb,
 244                           uint16_t leaf)
 245 {
 246     /* Wildly overreserve - it doesn't matter much. */
 247     phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
 248
 249     phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
 250 }
 251
 252 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
 253  * and update our entry so we can skip it and go directly to the destination.
 254  */
 255 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
 256 {
 257     unsigned valid_ptr = P_L2_SIZE;
 258     int valid = 0;
 259     PhysPageEntry *p;
 260     int i;
 261
 262     if (lp->ptr == PHYS_MAP_NODE_NIL) {
 263         return;
 264     }
 265
 266     p = nodes[lp->ptr];
 267     for (i = 0; i < P_L2_SIZE; i++) {
 268         if (p[i].ptr == PHYS_MAP_NODE_NIL) {
 269             continue;
 270         }
 271
 272         valid_ptr = i;
 273         valid++;
 274         if (p[i].skip) {
 275             phys_page_compact(&p[i], nodes, compacted);
 276         }
 277     }
 278
 279     /* We can only compress if there's only one child. */
 280     if (valid != 1) {
 281         return;
 282     }
 283
 284     assert(valid_ptr < P_L2_SIZE);
 285
 286     /* Don't compress if it won't fit in the # of bits we have. */
 287     if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
 288         return;
 289     }
 290
 291     lp->ptr = p[valid_ptr].ptr;
 292     if (!p[valid_ptr].skip) {
 293         /* If our only child is a leaf, make this a leaf. */
 294         /* By design, we should have made this node a leaf to begin with so we
 295          * should never reach here.
 296          * But since it's so simple to handle this, let's do it just in case we
 297          * change this rule.
 298          */
 299         lp->skip = 0;
 300     } else {
 301         lp->skip += p[valid_ptr].skip;
 302     }
 303 }
 304
 305 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
 306 {
 307     DECLARE_BITMAP(compacted, nodes_nb);
 308
 309     if (d->phys_map.skip) {
 310         phys_page_compact(&d->phys_map, d->map.nodes, compacted);
 311     }
 312 }
 313
 314 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
 315                                            Node *nodes, MemoryRegionSection *sections)
 316 {
 317     PhysPageEntry *p;
 318     hwaddr index = addr >> TARGET_PAGE_BITS;
 319     int i;
 320
 321     for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
 322         if (lp.ptr == PHYS_MAP_NODE_NIL) {
 323             return &sections[PHYS_SECTION_UNASSIGNED];
 324         }
 325         p = nodes[lp.ptr];
 326         lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
 327     }
 328
 329     if (sections[lp.ptr].size.hi ||
 330         range_covers_byte(sections[lp.ptr].offset_within_address_space,
 331                           sections[lp.ptr].size.lo, addr)) {
 332         return &sections[lp.ptr];
 333     } else {
 334         return &sections[PHYS_SECTION_UNASSIGNED];
 335     }
 336 }
 337
 338 bool memory_region_is_unassigned(MemoryRegion *mr)
 339 {
 340     return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
 341         && mr != &io_mem_watch;
 342 }
 343
 344 /* Called from RCU critical section */
 345 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
 346                                                         hwaddr addr,
 347                                                         bool resolve_subpage)
 348 {
 349     MemoryRegionSection *section;
 350     subpage_t *subpage;
 351
 352     section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
 353     if (resolve_subpage && section->mr->subpage) {
 354         subpage = container_of(section->mr, subpage_t, iomem);
 355         section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
 356     }
 357     return section;
 358 }
 359
 360 /* Called from RCU critical section */
 361 static MemoryRegionSection *
 362 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
 363                                  hwaddr *plen, bool resolve_subpage)
 364 {
 365     MemoryRegionSection *section;
 366     MemoryRegion *mr;
 367     Int128 diff;
 368
 369     section = address_space_lookup_region(d, addr, resolve_subpage);
 370     /* Compute offset within MemoryRegionSection */
 371     addr -= section->offset_within_address_space;
 372
 373     /* Compute offset within MemoryRegion */
 374     *xlat = addr + section->offset_within_region;
 375
 376     mr = section->mr;
 377
 378     /* MMIO registers can be expected to perform full-width accesses based only
 379      * on their address, without considering adjacent registers that could
 380      * decode to completely different MemoryRegions.  When such registers
 381      * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
 382      * regions overlap wildly.  For this reason we cannot clamp the accesses
 383      * here.
 384      *
 385      * If the length is small (as is the case for address_space_ldl/stl),
 386      * everything works fine.  If the incoming length is large, however,
 387      * the caller really has to do the clamping through memory_access_size.
 388      */
 389     if (memory_region_is_ram(mr)) {
 390         diff = int128_sub(section->size, int128_make64(addr));
 391         *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
 392     }
 393     return section;
 394 }
 395
 396 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
 397 {
 398     if (memory_region_is_ram(mr)) {
 399         return !(is_write && mr->readonly);
 400     }
 401     if (memory_region_is_romd(mr)) {
 402         return !is_write;
 403     }
 404
 405     return false;
 406 }
 407
 408 /* Called from RCU critical section */
 409 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
 410                                       hwaddr *xlat, hwaddr *plen,
 411                                       bool is_write)
 412 {
 413     IOMMUTLBEntry iotlb;
 414     MemoryRegionSection *section;
 415     MemoryRegion *mr;
 416
 417     for (;;) {
 418         AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
 419         section = address_space_translate_internal(d, addr, &addr, plen, true);
 420         mr = section->mr;
 421
 422         if (!mr->iommu_ops) {
 423             break;
 424         }
 425
 426         iotlb = mr->iommu_ops->translate(mr, addr, is_write);
 427         addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
 428                 | (addr & iotlb.addr_mask));
 429         *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
 430         if (!(iotlb.perm & (1 << is_write))) {
 431             mr = &io_mem_unassigned;
 432             break;
 433         }
 434
 435         as = iotlb.target_as;
 436     }
 437
 438     if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
 439         hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
 440         *plen = MIN(page, *plen);
 441     }
 442
 443     *xlat = addr;
 444     return mr;
 445 }
 446
 447 /* Called from RCU critical section */
 448 MemoryRegionSection *
 449 address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
 450                                   hwaddr *xlat, hwaddr *plen)
 451 {
 452     MemoryRegionSection *section;
 453     section = address_space_translate_internal(cpu->cpu_ases[0].memory_dispatch,
 454                                                addr, xlat, plen, false);
 455
 456     assert(!section->mr->iommu_ops);
 457     return section;
 458 }
 459 #endif
 460
 461 #if !defined(CONFIG_USER_ONLY)
 462
 463 static int cpu_common_post_load(void *opaque, int version_id)
 464 {
 465     CPUState *cpu = opaque;
 466
 467     /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
 468        version_id is increased. */
 469     cpu->interrupt_request &= ~0x01;
 470     tlb_flush(cpu, 1);
 471
 472     return 0;
 473 }
 474
 475 static int cpu_common_pre_load(void *opaque)
 476 {
 477     CPUState *cpu = opaque;
 478
 479     cpu->exception_index = -1;
 480
 481     return 0;
 482 }
 483
 484 static bool cpu_common_exception_index_needed(void *opaque)
 485 {
 486     CPUState *cpu = opaque;
 487
 488     return tcg_enabled() && cpu->exception_index != -1;
 489 }
 490
 491 static const VMStateDescription vmstate_cpu_common_exception_index = {
 492     .name = "cpu_common/exception_index",
 493     .version_id = 1,
 494     .minimum_version_id = 1,
 495     .needed = cpu_common_exception_index_needed,
 496     .fields = (VMStateField[]) {
 497         VMSTATE_INT32(exception_index, CPUState),
 498         VMSTATE_END_OF_LIST()
 499     }
 500 };
 501
 502 static bool cpu_common_crash_occurred_needed(void *opaque)
 503 {
 504     CPUState *cpu = opaque;
 505
 506     return cpu->crash_occurred;
 507 }
 508
 509 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
 510     .name = "cpu_common/crash_occurred",
 511     .version_id = 1,
 512     .minimum_version_id = 1,
 513     .needed = cpu_common_crash_occurred_needed,
 514     .fields = (VMStateField[]) {
 515         VMSTATE_BOOL(crash_occurred, CPUState),
 516         VMSTATE_END_OF_LIST()
 517     }
 518 };
 519
 520 const VMStateDescription vmstate_cpu_common = {
 521     .name = "cpu_common",
 522     .version_id = 1,
 523     .minimum_version_id = 1,
 524     .pre_load = cpu_common_pre_load,
 525     .post_load = cpu_common_post_load,
 526     .fields = (VMStateField[]) {
 527         VMSTATE_UINT32(halted, CPUState),
 528         VMSTATE_UINT32(interrupt_request, CPUState),
 529         VMSTATE_END_OF_LIST()
 530     },
 531     .subsections = (const VMStateDescription*[]) {
 532         &vmstate_cpu_common_exception_index,
 533         &vmstate_cpu_common_crash_occurred,
 534         NULL
 535     }
 536 };
 537
 538 #endif
 539
 540 CPUState *qemu_get_cpu(int index)
 541 {
 542     CPUState *cpu;
 543
 544     CPU_FOREACH(cpu) {
 545         if (cpu->cpu_index == index) {
 546             return cpu;
 547         }
 548     }
 549
 550     return NULL;
 551 }
 552
 553 #if !defined(CONFIG_USER_ONLY)
 554 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
 555 {
 556     /* We only support one address space per cpu at the moment.  */
 557     assert(cpu->as == as);
 558
 559     if (cpu->cpu_ases) {
 560         /* We've already registered the listener for our only AS */
 561         return;
 562     }
 563
 564     cpu->cpu_ases = g_new0(CPUAddressSpace, 1);
 565     cpu->cpu_ases[0].cpu = cpu;
 566     cpu->cpu_ases[0].as = as;
 567     cpu->cpu_ases[0].tcg_as_listener.commit = tcg_commit;
 568     memory_listener_register(&cpu->cpu_ases[0].tcg_as_listener, as);
 569 }
 570 #endif
 571
 572 #ifndef CONFIG_USER_ONLY
 573 static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
 574
 575 static int cpu_get_free_index(Error **errp)
 576 {
 577     int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
 578
 579     if (cpu >= MAX_CPUMASK_BITS) {
 580         error_setg(errp, "Trying to use more CPUs than max of %d",
 581                    MAX_CPUMASK_BITS);
 582         return -1;
 583     }
 584
 585     bitmap_set(cpu_index_map, cpu, 1);
 586     return cpu;
 587 }
 588
 589 void cpu_exec_exit(CPUState *cpu)
 590 {
 591     if (cpu->cpu_index == -1) {
 592         /* cpu_index was never allocated by this @cpu or was already freed. */
 593         return;
 594     }
 595
 596     bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
 597     cpu->cpu_index = -1;
 598 }
 599 #else
 600
 601 static int cpu_get_free_index(Error **errp)
 602 {
 603     CPUState *some_cpu;
 604     int cpu_index = 0;
 605
 606     CPU_FOREACH(some_cpu) {
 607         cpu_index++;
 608     }
 609     return cpu_index;
 610 }
 611
 612 void cpu_exec_exit(CPUState *cpu)
 613 {
 614 }
 615 #endif
 616
 617 void cpu_exec_init(CPUState *cpu, Error **errp)
 618 {
 619     CPUClass *cc = CPU_GET_CLASS(cpu);
 620     int cpu_index;
 621     Error *local_err = NULL;
 622
 623 #ifndef CONFIG_USER_ONLY
 624     cpu->as = &address_space_memory;
 625     cpu->thread_id = qemu_get_thread_id();
 626 #endif
 627
 628 #if defined(CONFIG_USER_ONLY)
 629     cpu_list_lock();
 630 #endif
 631     cpu_index = cpu->cpu_index = cpu_get_free_index(&local_err);
 632     if (local_err) {
 633         error_propagate(errp, local_err);
 634 #if defined(CONFIG_USER_ONLY)
 635         cpu_list_unlock();
 636 #endif
 637         return;
 638     }
 639     QTAILQ_INSERT_TAIL(&cpus, cpu, node);
 640 #if defined(CONFIG_USER_ONLY)
 641     cpu_list_unlock();
 642 #endif
 643     if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
 644         vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
 645     }
 646 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
 647     register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
 648                     cpu_save, cpu_load, cpu->env_ptr);
 649     assert(cc->vmsd == NULL);
 650     assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
 651 #endif
 652     if (cc->vmsd != NULL) {
 653         vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
 654     }
 655 }
 656
 657 #if defined(CONFIG_USER_ONLY)
 658 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 659 {
 660     tb_invalidate_phys_page_range(pc, pc + 1, 0);
 661 }
 662 #else
 663 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 664 {
 665     hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
 666     if (phys != -1) {
 667         tb_invalidate_phys_addr(cpu->as,
 668                                 phys | (pc & ~TARGET_PAGE_MASK));
 669     }
 670 }
 671 #endif
 672
 673 #if defined(CONFIG_USER_ONLY)
 674 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 675
 676 {
 677 }
 678
 679 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 680                           int flags)
 681 {
 682     return -ENOSYS;
 683 }
 684
 685 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 686 {
 687 }
 688
 689 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 690                           int flags, CPUWatchpoint **watchpoint)
 691 {
 692     return -ENOSYS;
 693 }
 694 #else
 695 /* Add a watchpoint.  */
 696 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 697                           int flags, CPUWatchpoint **watchpoint)
 698 {
 699     CPUWatchpoint *wp;
 700
 701     /* forbid ranges which are empty or run off the end of the address space */
 702     if (len == 0 || (addr + len - 1) < addr) {
 703         error_report("tried to set invalid watchpoint at %"
 704                      VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
 705         return -EINVAL;
 706     }
 707     wp = g_malloc(sizeof(*wp));
 708
 709     wp->vaddr = addr;
 710     wp->len = len;
 711     wp->flags = flags;
 712
 713     /* keep all GDB-injected watchpoints in front */
 714     if (flags & BP_GDB) {
 715         QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
 716     } else {
 717         QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
 718     }
 719
 720     tlb_flush_page(cpu, addr);
 721
 722     if (watchpoint)
 723         *watchpoint = wp;
 724     return 0;
 725 }
 726
 727 /* Remove a specific watchpoint.  */
 728 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 729                           int flags)
 730 {
 731     CPUWatchpoint *wp;
 732
 733     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
 734         if (addr == wp->vaddr && len == wp->len
 735                 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
 736             cpu_watchpoint_remove_by_ref(cpu, wp);
 737             return 0;
 738         }
 739     }
 740     return -ENOENT;
 741 }
 742
 743 /* Remove a specific watchpoint by reference.  */
 744 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 745 {
 746     QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
 747
 748     tlb_flush_page(cpu, watchpoint->vaddr);
 749
 750     g_free(watchpoint);
 751 }
 752
 753 /* Remove all matching watchpoints.  */
 754 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 755 {
 756     CPUWatchpoint *wp, *next;
 757
 758     QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
 759         if (wp->flags & mask) {
 760             cpu_watchpoint_remove_by_ref(cpu, wp);
 761         }
 762     }
 763 }
 764
 765 /* Return true if this watchpoint address matches the specified
 766  * access (ie the address range covered by the watchpoint overlaps
 767  * partially or completely with the address range covered by the
 768  * access).
 769  */
 770 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
 771                                                   vaddr addr,
 772                                                   vaddr len)
 773 {
 774     /* We know the lengths are non-zero, but a little caution is
 775      * required to avoid errors in the case where the range ends
 776      * exactly at the top of the address space and so addr + len
 777      * wraps round to zero.
 778      */
 779     vaddr wpend = wp->vaddr + wp->len - 1;
 780     vaddr addrend = addr + len - 1;
 781
 782     return !(addr > wpend || wp->vaddr > addrend);
 783 }
 784
 785 #endif
 786
 787 /* Add a breakpoint.  */
 788 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
 789                           CPUBreakpoint **breakpoint)
 790 {
 791     CPUBreakpoint *bp;
 792
 793     bp = g_malloc(sizeof(*bp));
 794
 795     bp->pc = pc;
 796     bp->flags = flags;
 797
 798     /* keep all GDB-injected breakpoints in front */
 799     if (flags & BP_GDB) {
 800         QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
 801     } else {
 802         QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
 803     }
 804
 805     breakpoint_invalidate(cpu, pc);
 806
 807     if (breakpoint) {
 808         *breakpoint = bp;
 809     }
 810     return 0;
 811 }
 812
 813 /* Remove a specific breakpoint.  */
 814 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
 815 {
 816     CPUBreakpoint *bp;
 817
 818     QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
 819         if (bp->pc == pc && bp->flags == flags) {
 820             cpu_breakpoint_remove_by_ref(cpu, bp);
 821             return 0;
 822         }
 823     }
 824     return -ENOENT;
 825 }
 826
 827 /* Remove a specific breakpoint by reference.  */
 828 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
 829 {
 830     QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
 831
 832     breakpoint_invalidate(cpu, breakpoint->pc);
 833
 834     g_free(breakpoint);
 835 }
 836
 837 /* Remove all matching breakpoints. */
 838 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
 839 {
 840     CPUBreakpoint *bp, *next;
 841
 842     QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
 843         if (bp->flags & mask) {
 844             cpu_breakpoint_remove_by_ref(cpu, bp);
 845         }
 846     }
 847 }
 848
 849 /* enable or disable single step mode. EXCP_DEBUG is returned by the
 850    CPU loop after each instruction */
 851 void cpu_single_step(CPUState *cpu, int enabled)
 852 {
 853     if (cpu->singlestep_enabled != enabled) {
 854         cpu->singlestep_enabled = enabled;
 855         if (kvm_enabled()) {
 856             kvm_update_guest_debug(cpu, 0);
 857         } else {
 858             /* must flush all the translated code to avoid inconsistencies */
 859             /* XXX: only flush what is necessary */
 860             tb_flush(cpu);
 861         }
 862     }
 863 }
 864
 865 void cpu_abort(CPUState *cpu, const char *fmt, ...)
 866 {
 867     va_list ap;
 868     va_list ap2;
 869
 870     va_start(ap, fmt);
 871     va_copy(ap2, ap);
 872     fprintf(stderr, "qemu: fatal: ");
 873     vfprintf(stderr, fmt, ap);
 874     fprintf(stderr, "\n");
 875     cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 876     if (qemu_log_enabled()) {
 877         qemu_log("qemu: fatal: ");
 878         qemu_log_vprintf(fmt, ap2);
 879         qemu_log("\n");
 880         log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 881         qemu_log_flush();
 882         qemu_log_close();
 883     }
 884     va_end(ap2);
 885     va_end(ap);
 886     replay_finish();
 887 #if defined(CONFIG_USER_ONLY)
 888     {
 889         struct sigaction act;
 890         sigfillset(&act.sa_mask);
 891         act.sa_handler = SIG_DFL;
 892         sigaction(SIGABRT, &act, NULL);
 893     }
 894 #endif
 895     abort();
 896 }
 897
 898 #if !defined(CONFIG_USER_ONLY)
 899 /* Called from RCU critical section */
 900 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
 901 {
 902     RAMBlock *block;
 903
 904     block = atomic_rcu_read(&ram_list.mru_block);
 905     if (block && addr - block->offset < block->max_length) {
 906         return block;
 907     }
 908     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
 909         if (addr - block->offset < block->max_length) {
 910             goto found;
 911         }
 912     }
 913
 914     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
 915     abort();
 916
 917 found:
 918     /* It is safe to write mru_block outside the iothread lock.  This
 919      * is what happens:
 920      *
 921      *     mru_block = xxx
 922      *     rcu_read_unlock()
 923      *                                        xxx removed from list
 924      *                  rcu_read_lock()
 925      *                  read mru_block
 926      *                                        mru_block = NULL;
 927      *                                        call_rcu(reclaim_ramblock, xxx);
 928      *                  rcu_read_unlock()
 929      *
 930      * atomic_rcu_set is not needed here.  The block was already published
 931      * when it was placed into the list.  Here we're just making an extra
 932      * copy of the pointer.
 933      */
 934     ram_list.mru_block = block;
 935     return block;
 936 }
 937
 938 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
 939 {
 940     CPUState *cpu;
 941     ram_addr_t start1;
 942     RAMBlock *block;
 943     ram_addr_t end;
 944
 945     end = TARGET_PAGE_ALIGN(start + length);
 946     start &= TARGET_PAGE_MASK;
 947
 948     rcu_read_lock();
 949     block = qemu_get_ram_block(start);
 950     assert(block == qemu_get_ram_block(end - 1));
 951     start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
 952     CPU_FOREACH(cpu) {
 953         tlb_reset_dirty(cpu, start1, length);
 954     }
 955     rcu_read_unlock();
 956 }
 957
 958 /* Note: start and end must be within the same ram block.  */
 959 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
 960                                               ram_addr_t length,
 961                                               unsigned client)
 962 {
 963     unsigned long end, page;
 964     bool dirty;
 965
 966     if (length == 0) {
 967         return false;
 968     }
 969
 970     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
 971     page = start >> TARGET_PAGE_BITS;
 972     dirty = bitmap_test_and_clear_atomic(ram_list.dirty_memory[client],
 973                                          page, end - page);
 974
 975     if (dirty && tcg_enabled()) {
 976         tlb_reset_dirty_range_all(start, length);
 977     }
 978
 979     return dirty;
 980 }
 981
 982 /* Called from RCU critical section */
 983 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
 984                                        MemoryRegionSection *section,
 985                                        target_ulong vaddr,
 986                                        hwaddr paddr, hwaddr xlat,
 987                                        int prot,
 988                                        target_ulong *address)
 989 {
 990     hwaddr iotlb;
 991     CPUWatchpoint *wp;
 992
 993     if (memory_region_is_ram(section->mr)) {
 994         /* Normal RAM.  */
 995         iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
 996             + xlat;
 997         if (!section->readonly) {
 998             iotlb |= PHYS_SECTION_NOTDIRTY;
 999         } else {
1000             iotlb |= PHYS_SECTION_ROM;
1001         }
1002     } else {
1003         AddressSpaceDispatch *d;
1004
1005         d = atomic_rcu_read(&section->address_space->dispatch);
1006         iotlb = section - d->map.sections;
1007         iotlb += xlat;
1008     }
1009
1010     /* Make accesses to pages with watchpoints go via the
1011        watchpoint trap routines.  */
1012     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1013         if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1014             /* Avoid trapping reads of pages with a write breakpoint. */
1015             if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1016                 iotlb = PHYS_SECTION_WATCH + paddr;
1017                 *address |= TLB_MMIO;
1018                 break;
1019             }
1020         }
1021     }
1022
1023     return iotlb;
1024 }
1025 #endif /* defined(CONFIG_USER_ONLY) */
1026
1027 #if !defined(CONFIG_USER_ONLY)
1028
1029 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1030                              uint16_t section);
1031 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1032
1033 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1034                                qemu_anon_ram_alloc;
1035
1036 /*
1037  * Set a custom physical guest memory alloator.
1038  * Accelerators with unusual needs may need this.  Hopefully, we can
1039  * get rid of it eventually.
1040  */
1041 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1042 {
1043     phys_mem_alloc = alloc;
1044 }
1045
1046 static uint16_t phys_section_add(PhysPageMap *map,
1047                                  MemoryRegionSection *section)
1048 {
1049     /* The physical section number is ORed with a page-aligned
1050      * pointer to produce the iotlb entries.  Thus it should
1051      * never overflow into the page-aligned value.
1052      */
1053     assert(map->sections_nb < TARGET_PAGE_SIZE);
1054
1055     if (map->sections_nb == map->sections_nb_alloc) {
1056         map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1057         map->sections = g_renew(MemoryRegionSection, map->sections,
1058                                 map->sections_nb_alloc);
1059     }
1060     map->sections[map->sections_nb] = *section;
1061     memory_region_ref(section->mr);
1062     return map->sections_nb++;
1063 }
1064
1065 static void phys_section_destroy(MemoryRegion *mr)
1066 {
1067     bool have_sub_page = mr->subpage;
1068
1069     memory_region_unref(mr);
1070
1071     if (have_sub_page) {
1072         subpage_t *subpage = container_of(mr, subpage_t, iomem);
1073         object_unref(OBJECT(&subpage->iomem));
1074         g_free(subpage);
1075     }
1076 }
1077
1078 static void phys_sections_free(PhysPageMap *map)
1079 {
1080     while (map->sections_nb > 0) {
1081         MemoryRegionSection *section = &map->sections[--map->sections_nb];
1082         phys_section_destroy(section->mr);
1083     }
1084     g_free(map->sections);
1085     g_free(map->nodes);
1086 }
1087
1088 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1089 {
1090     subpage_t *subpage;
1091     hwaddr base = section->offset_within_address_space
1092         & TARGET_PAGE_MASK;
1093     MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1094                                                    d->map.nodes, d->map.sections);
1095     MemoryRegionSection subsection = {
1096         .offset_within_address_space = base,
1097         .size = int128_make64(TARGET_PAGE_SIZE),
1098     };
1099     hwaddr start, end;
1100
1101     assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1102
1103     if (!(existing->mr->subpage)) {
1104         subpage = subpage_init(d->as, base);
1105         subsection.address_space = d->as;
1106         subsection.mr = &subpage->iomem;
1107         phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1108                       phys_section_add(&d->map, &subsection));
1109     } else {
1110         subpage = container_of(existing->mr, subpage_t, iomem);
1111     }
1112     start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1113     end = start + int128_get64(section->size) - 1;
1114     subpage_register(subpage, start, end,
1115                      phys_section_add(&d->map, section));
1116 }
1117
1118
1119 static void register_multipage(AddressSpaceDispatch *d,
1120                                MemoryRegionSection *section)
1121 {
1122     hwaddr start_addr = section->offset_within_address_space;
1123     uint16_t section_index = phys_section_add(&d->map, section);
1124     uint64_t num_pages = int128_get64(int128_rshift(section->size,
1125                                                     TARGET_PAGE_BITS));
1126
1127     assert(num_pages);
1128     phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1129 }
1130
1131 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1132 {
1133     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1134     AddressSpaceDispatch *d = as->next_dispatch;
1135     MemoryRegionSection now = *section, remain = *section;
1136     Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1137
1138     if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1139         uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1140                        - now.offset_within_address_space;
1141
1142         now.size = int128_min(int128_make64(left), now.size);
1143         register_subpage(d, &now);
1144     } else {
1145         now.size = int128_zero();
1146     }
1147     while (int128_ne(remain.size, now.size)) {
1148         remain.size = int128_sub(remain.size, now.size);
1149         remain.offset_within_address_space += int128_get64(now.size);
1150         remain.offset_within_region += int128_get64(now.size);
1151         now = remain;
1152         if (int128_lt(remain.size, page_size)) {
1153             register_subpage(d, &now);
1154         } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1155             now.size = page_size;
1156             register_subpage(d, &now);
1157         } else {
1158             now.size = int128_and(now.size, int128_neg(page_size));
1159             register_multipage(d, &now);
1160         }
1161     }
1162 }
1163
1164 void qemu_flush_coalesced_mmio_buffer(void)
1165 {
1166     if (kvm_enabled())
1167         kvm_flush_coalesced_mmio_buffer();
1168 }
1169
1170 void qemu_mutex_lock_ramlist(void)
1171 {
1172     qemu_mutex_lock(&ram_list.mutex);
1173 }
1174
1175 void qemu_mutex_unlock_ramlist(void)
1176 {
1177     qemu_mutex_unlock(&ram_list.mutex);
1178 }
1179
1180 #ifdef __linux__
1181
1182 #include <sys/vfs.h>
1183
1184 #define HUGETLBFS_MAGIC       0x958458f6
1185
1186 static long gethugepagesize(const char *path, Error **errp)
1187 {
1188     struct statfs fs;
1189     int ret;
1190
1191     do {
1192         ret = statfs(path, &fs);
1193     } while (ret != 0 && errno == EINTR);
1194
1195     if (ret != 0) {
1196         error_setg_errno(errp, errno, "failed to get page size of file %s",
1197                          path);
1198         return 0;
1199     }
1200
1201     return fs.f_bsize;
1202 }
1203
1204 static void *file_ram_alloc(RAMBlock *block,
1205                             ram_addr_t memory,
1206                             const char *path,
1207                             Error **errp)
1208 {
1209     struct stat st;
1210     char *filename;
1211     char *sanitized_name;
1212     char *c;
1213     void *area;
1214     int fd;
1215     uint64_t hpagesize;
1216     Error *local_err = NULL;
1217
1218     hpagesize = gethugepagesize(path, &local_err);
1219     if (local_err) {
1220         error_propagate(errp, local_err);
1221         goto error;
1222     }
1223     block->mr->align = hpagesize;
1224
1225     if (memory < hpagesize) {
1226         error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1227                    "or larger than huge page size 0x%" PRIx64,
1228                    memory, hpagesize);
1229         goto error;
1230     }
1231
1232     if (kvm_enabled() && !kvm_has_sync_mmu()) {
1233         error_setg(errp,
1234                    "host lacks kvm mmu notifiers, -mem-path unsupported");
1235         goto error;
1236     }
1237
1238     if (!stat(path, &st) && S_ISDIR(st.st_mode)) {
1239         /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1240         sanitized_name = g_strdup(memory_region_name(block->mr));
1241         for (c = sanitized_name; *c != '\0'; c++) {
1242             if (*c == '/') {
1243                 *c = '_';
1244             }
1245         }
1246
1247         filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1248                                    sanitized_name);
1249         g_free(sanitized_name);
1250
1251         fd = mkstemp(filename);
1252         if (fd >= 0) {
1253             unlink(filename);
1254         }
1255         g_free(filename);
1256     } else {
1257         fd = open(path, O_RDWR | O_CREAT, 0644);
1258     }
1259
1260     if (fd < 0) {
1261         error_setg_errno(errp, errno,
1262                          "unable to create backing store for hugepages");
1263         goto error;
1264     }
1265
1266     memory = ROUND_UP(memory, hpagesize);
1267
1268     /*
1269      * ftruncate is not supported by hugetlbfs in older
1270      * hosts, so don't bother bailing out on errors.
1271      * If anything goes wrong with it under other filesystems,
1272      * mmap will fail.
1273      */
1274     if (ftruncate(fd, memory)) {
1275         perror("ftruncate");
1276     }
1277
1278     area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
1279     if (area == MAP_FAILED) {
1280         error_setg_errno(errp, errno,
1281                          "unable to map backing store for hugepages");
1282         close(fd);
1283         goto error;
1284     }
1285
1286     if (mem_prealloc) {
1287         os_mem_prealloc(fd, area, memory);
1288     }
1289
1290     block->fd = fd;
1291     return area;
1292
1293 error:
1294     return NULL;
1295 }
1296 #endif
1297
1298 /* Called with the ramlist lock held.  */
1299 static ram_addr_t find_ram_offset(ram_addr_t size)
1300 {
1301     RAMBlock *block, *next_block;
1302     ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1303
1304     assert(size != 0); /* it would hand out same offset multiple times */
1305
1306     if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1307         return 0;
1308     }
1309
1310     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1311         ram_addr_t end, next = RAM_ADDR_MAX;
1312
1313         end = block->offset + block->max_length;
1314
1315         QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1316             if (next_block->offset >= end) {
1317                 next = MIN(next, next_block->offset);
1318             }
1319         }
1320         if (next - end >= size && next - end < mingap) {
1321             offset = end;
1322             mingap = next - end;
1323         }
1324     }
1325
1326     if (offset == RAM_ADDR_MAX) {
1327         fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1328                 (uint64_t)size);
1329         abort();
1330     }
1331
1332     return offset;
1333 }
1334
1335 ram_addr_t last_ram_offset(void)
1336 {
1337     RAMBlock *block;
1338     ram_addr_t last = 0;
1339
1340     rcu_read_lock();
1341     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1342         last = MAX(last, block->offset + block->max_length);
1343     }
1344     rcu_read_unlock();
1345     return last;
1346 }
1347
1348 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1349 {
1350     int ret;
1351
1352     /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1353     if (!machine_dump_guest_core(current_machine)) {
1354         ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1355         if (ret) {
1356             perror("qemu_madvise");
1357             fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1358                             "but dump_guest_core=off specified\n");
1359         }
1360     }
1361 }
1362
1363 /* Called within an RCU critical section, or while the ramlist lock
1364  * is held.
1365  */
1366 static RAMBlock *find_ram_block(ram_addr_t addr)
1367 {
1368     RAMBlock *block;
1369
1370     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1371         if (block->offset == addr) {
1372             return block;
1373         }
1374     }
1375
1376     return NULL;
1377 }
1378
1379 const char *qemu_ram_get_idstr(RAMBlock *rb)
1380 {
1381     return rb->idstr;
1382 }
1383
1384 /* Called with iothread lock held.  */
1385 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1386 {
1387     RAMBlock *new_block, *block;
1388
1389     rcu_read_lock();
1390     new_block = find_ram_block(addr);
1391     assert(new_block);
1392     assert(!new_block->idstr[0]);
1393
1394     if (dev) {
1395         char *id = qdev_get_dev_path(dev);
1396         if (id) {
1397             snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1398             g_free(id);
1399         }
1400     }
1401     pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1402
1403     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1404         if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1405             fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1406                     new_block->idstr);
1407             abort();
1408         }
1409     }
1410     rcu_read_unlock();
1411 }
1412
1413 /* Called with iothread lock held.  */
1414 void qemu_ram_unset_idstr(ram_addr_t addr)
1415 {
1416     RAMBlock *block;
1417
1418     /* FIXME: arch_init.c assumes that this is not called throughout
1419      * migration.  Ignore the problem since hot-unplug during migration
1420      * does not work anyway.
1421      */
1422
1423     rcu_read_lock();
1424     block = find_ram_block(addr);
1425     if (block) {
1426         memset(block->idstr, 0, sizeof(block->idstr));
1427     }
1428     rcu_read_unlock();
1429 }
1430
1431 static int memory_try_enable_merging(void *addr, size_t len)
1432 {
1433     if (!machine_mem_merge(current_machine)) {
1434         /* disabled by the user */
1435         return 0;
1436     }
1437
1438     return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1439 }
1440
1441 /* Only legal before guest might have detected the memory size: e.g. on
1442  * incoming migration, or right after reset.
1443  *
1444  * As memory core doesn't know how is memory accessed, it is up to
1445  * resize callback to update device state and/or add assertions to detect
1446  * misuse, if necessary.
1447  */
1448 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1449 {
1450     RAMBlock *block = find_ram_block(base);
1451
1452     assert(block);
1453
1454     newsize = HOST_PAGE_ALIGN(newsize);
1455
1456     if (block->used_length == newsize) {
1457         return 0;
1458     }
1459
1460     if (!(block->flags & RAM_RESIZEABLE)) {
1461         error_setg_errno(errp, EINVAL,
1462                          "Length mismatch: %s: 0x" RAM_ADDR_FMT
1463                          " in != 0x" RAM_ADDR_FMT, block->idstr,
1464                          newsize, block->used_length);
1465         return -EINVAL;
1466     }
1467
1468     if (block->max_length < newsize) {
1469         error_setg_errno(errp, EINVAL,
1470                          "Length too large: %s: 0x" RAM_ADDR_FMT
1471                          " > 0x" RAM_ADDR_FMT, block->idstr,
1472                          newsize, block->max_length);
1473         return -EINVAL;
1474     }
1475
1476     cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1477     block->used_length = newsize;
1478     cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1479                                         DIRTY_CLIENTS_ALL);
1480     memory_region_set_size(block->mr, newsize);
1481     if (block->resized) {
1482         block->resized(block->idstr, newsize, block->host);
1483     }
1484     return 0;
1485 }
1486
1487 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1488 {
1489     RAMBlock *block;
1490     RAMBlock *last_block = NULL;
1491     ram_addr_t old_ram_size, new_ram_size;
1492
1493     old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1494
1495     qemu_mutex_lock_ramlist();
1496     new_block->offset = find_ram_offset(new_block->max_length);
1497
1498     if (!new_block->host) {
1499         if (xen_enabled()) {
1500             xen_ram_alloc(new_block->offset, new_block->max_length,
1501                           new_block->mr);
1502         } else {
1503             new_block->host = phys_mem_alloc(new_block->max_length,
1504                                              &new_block->mr->align);
1505             if (!new_block->host) {
1506                 error_setg_errno(errp, errno,
1507                                  "cannot set up guest memory '%s'",
1508                                  memory_region_name(new_block->mr));
1509                 qemu_mutex_unlock_ramlist();
1510                 return -1;
1511             }
1512             memory_try_enable_merging(new_block->host, new_block->max_length);
1513         }
1514     }
1515
1516     new_ram_size = MAX(old_ram_size,
1517               (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1518     if (new_ram_size > old_ram_size) {
1519         migration_bitmap_extend(old_ram_size, new_ram_size);
1520     }
1521     /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
1522      * QLIST (which has an RCU-friendly variant) does not have insertion at
1523      * tail, so save the last element in last_block.
1524      */
1525     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1526         last_block = block;
1527         if (block->max_length < new_block->max_length) {
1528             break;
1529         }
1530     }
1531     if (block) {
1532         QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1533     } else if (last_block) {
1534         QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1535     } else { /* list is empty */
1536         QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1537     }
1538     ram_list.mru_block = NULL;
1539
1540     /* Write list before version */
1541     smp_wmb();
1542     ram_list.version++;
1543     qemu_mutex_unlock_ramlist();
1544
1545     new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1546
1547     if (new_ram_size > old_ram_size) {
1548         int i;
1549
1550         /* ram_list.dirty_memory[] is protected by the iothread lock.  */
1551         for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1552             ram_list.dirty_memory[i] =
1553                 bitmap_zero_extend(ram_list.dirty_memory[i],
1554                                    old_ram_size, new_ram_size);
1555        }
1556     }
1557     cpu_physical_memory_set_dirty_range(new_block->offset,
1558                                         new_block->used_length,
1559                                         DIRTY_CLIENTS_ALL);
1560
1561     if (new_block->host) {
1562         qemu_ram_setup_dump(new_block->host, new_block->max_length);
1563         qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1564         qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1565         if (kvm_enabled()) {
1566             kvm_setup_guest_memory(new_block->host, new_block->max_length);
1567         }
1568     }
1569
1570     return new_block->offset;
1571 }
1572
1573 #ifdef __linux__
1574 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1575                                     bool share, const char *mem_path,
1576                                     Error **errp)
1577 {
1578     RAMBlock *new_block;
1579     ram_addr_t addr;
1580     Error *local_err = NULL;
1581
1582     if (xen_enabled()) {
1583         error_setg(errp, "-mem-path not supported with Xen");
1584         return -1;
1585     }
1586
1587     if (phys_mem_alloc != qemu_anon_ram_alloc) {
1588         /*
1589          * file_ram_alloc() needs to allocate just like
1590          * phys_mem_alloc, but we haven't bothered to provide
1591          * a hook there.
1592          */
1593         error_setg(errp,
1594                    "-mem-path not supported with this accelerator");
1595         return -1;
1596     }
1597
1598     size = HOST_PAGE_ALIGN(size);
1599     new_block = g_malloc0(sizeof(*new_block));
1600     new_block->mr = mr;
1601     new_block->used_length = size;
1602     new_block->max_length = size;
1603     new_block->flags = share ? RAM_SHARED : 0;
1604     new_block->flags |= RAM_FILE;
1605     new_block->host = file_ram_alloc(new_block, size,
1606                                      mem_path, errp);
1607     if (!new_block->host) {
1608         g_free(new_block);
1609         return -1;
1610     }
1611
1612     addr = ram_block_add(new_block, &local_err);
1613     if (local_err) {
1614         g_free(new_block);
1615         error_propagate(errp, local_err);
1616         return -1;
1617     }
1618     return addr;
1619 }
1620 #endif
1621
1622 static
1623 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1624                                    void (*resized)(const char*,
1625                                                    uint64_t length,
1626                                                    void *host),
1627                                    void *host, bool resizeable,
1628                                    MemoryRegion *mr, Error **errp)
1629 {
1630     RAMBlock *new_block;
1631     ram_addr_t addr;
1632     Error *local_err = NULL;
1633
1634     size = HOST_PAGE_ALIGN(size);
1635     max_size = HOST_PAGE_ALIGN(max_size);
1636     new_block = g_malloc0(sizeof(*new_block));
1637     new_block->mr = mr;
1638     new_block->resized = resized;
1639     new_block->used_length = size;
1640     new_block->max_length = max_size;
1641     assert(max_size >= size);
1642     new_block->fd = -1;
1643     new_block->host = host;
1644     if (host) {
1645         new_block->flags |= RAM_PREALLOC;
1646     }
1647     if (resizeable) {
1648         new_block->flags |= RAM_RESIZEABLE;
1649     }
1650     addr = ram_block_add(new_block, &local_err);
1651     if (local_err) {
1652         g_free(new_block);
1653         error_propagate(errp, local_err);
1654         return -1;
1655     }
1656     return addr;
1657 }
1658
1659 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1660                                    MemoryRegion *mr, Error **errp)
1661 {
1662     return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1663 }
1664
1665 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1666 {
1667     return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1668 }
1669
1670 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1671                                      void (*resized)(const char*,
1672                                                      uint64_t length,
1673                                                      void *host),
1674                                      MemoryRegion *mr, Error **errp)
1675 {
1676     return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1677 }
1678
1679 void qemu_ram_free_from_ptr(ram_addr_t addr)
1680 {
1681     RAMBlock *block;
1682
1683     qemu_mutex_lock_ramlist();
1684     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1685         if (addr == block->offset) {
1686             QLIST_REMOVE_RCU(block, next);
1687             ram_list.mru_block = NULL;
1688             /* Write list before version */
1689             smp_wmb();
1690             ram_list.version++;
1691             g_free_rcu(block, rcu);
1692             break;
1693         }
1694     }
1695     qemu_mutex_unlock_ramlist();
1696 }
1697
1698 static void reclaim_ramblock(RAMBlock *block)
1699 {
1700     if (block->flags & RAM_PREALLOC) {
1701         ;
1702     } else if (xen_enabled()) {
1703         xen_invalidate_map_cache_entry(block->host);
1704 #ifndef _WIN32
1705     } else if (block->fd >= 0) {
1706         if (block->flags & RAM_FILE) {
1707             qemu_ram_munmap(block->host, block->max_length);
1708         } else {
1709             munmap(block->host, block->max_length);
1710         }
1711         close(block->fd);
1712 #endif
1713     } else {
1714         qemu_anon_ram_free(block->host, block->max_length);
1715     }
1716     g_free(block);
1717 }
1718
1719 void qemu_ram_free(ram_addr_t addr)
1720 {
1721     RAMBlock *block;
1722
1723     qemu_mutex_lock_ramlist();
1724     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1725         if (addr == block->offset) {
1726             QLIST_REMOVE_RCU(block, next);
1727             ram_list.mru_block = NULL;
1728             /* Write list before version */
1729             smp_wmb();
1730             ram_list.version++;
1731             call_rcu(block, reclaim_ramblock, rcu);
1732             break;
1733         }
1734     }
1735     qemu_mutex_unlock_ramlist();
1736 }
1737
1738 #ifndef _WIN32
1739 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1740 {
1741     RAMBlock *block;
1742     ram_addr_t offset;
1743     int flags;
1744     void *area, *vaddr;
1745
1746     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1747         offset = addr - block->offset;
1748         if (offset < block->max_length) {
1749             vaddr = ramblock_ptr(block, offset);
1750             if (block->flags & RAM_PREALLOC) {
1751                 ;
1752             } else if (xen_enabled()) {
1753                 abort();
1754             } else {
1755                 flags = MAP_FIXED;
1756                 if (block->fd >= 0) {
1757                     flags |= (block->flags & RAM_SHARED ?
1758                               MAP_SHARED : MAP_PRIVATE);
1759                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1760                                 flags, block->fd, offset);
1761                 } else {
1762                     /*
1763                      * Remap needs to match alloc.  Accelerators that
1764                      * set phys_mem_alloc never remap.  If they did,
1765                      * we'd need a remap hook here.
1766                      */
1767                     assert(phys_mem_alloc == qemu_anon_ram_alloc);
1768
1769                     flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1770                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1771                                 flags, -1, 0);
1772                 }
1773                 if (area != vaddr) {
1774                     fprintf(stderr, "Could not remap addr: "
1775                             RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1776                             length, addr);
1777                     exit(1);
1778                 }
1779                 memory_try_enable_merging(vaddr, length);
1780                 qemu_ram_setup_dump(vaddr, length);
1781             }
1782         }
1783     }
1784 }
1785 #endif /* !_WIN32 */
1786
1787 int qemu_get_ram_fd(ram_addr_t addr)
1788 {
1789     RAMBlock *block;
1790     int fd;
1791
1792     rcu_read_lock();
1793     block = qemu_get_ram_block(addr);
1794     fd = block->fd;
1795     rcu_read_unlock();
1796     return fd;
1797 }
1798
1799 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1800 {
1801     RAMBlock *block;
1802     void *ptr;
1803
1804     rcu_read_lock();
1805     block = qemu_get_ram_block(addr);
1806     ptr = ramblock_ptr(block, 0);
1807     rcu_read_unlock();
1808     return ptr;
1809 }
1810
1811 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1812  * This should not be used for general purpose DMA.  Use address_space_map
1813  * or address_space_rw instead. For local memory (e.g. video ram) that the
1814  * device owns, use memory_region_get_ram_ptr.
1815  *
1816  * By the time this function returns, the returned pointer is not protected
1817  * by RCU anymore.  If the caller is not within an RCU critical section and
1818  * does not hold the iothread lock, it must have other means of protecting the
1819  * pointer, such as a reference to the region that includes the incoming
1820  * ram_addr_t.
1821  */
1822 void *qemu_get_ram_ptr(ram_addr_t addr)
1823 {
1824     RAMBlock *block;
1825     void *ptr;
1826
1827     rcu_read_lock();
1828     block = qemu_get_ram_block(addr);
1829
1830     if (xen_enabled() && block->host == NULL) {
1831         /* We need to check if the requested address is in the RAM
1832          * because we don't want to map the entire memory in QEMU.
1833          * In that case just map until the end of the page.
1834          */
1835         if (block->offset == 0) {
1836             ptr = xen_map_cache(addr, 0, 0);
1837             goto unlock;
1838         }
1839
1840         block->host = xen_map_cache(block->offset, block->max_length, 1);
1841     }
1842     ptr = ramblock_ptr(block, addr - block->offset);
1843
1844 unlock:
1845     rcu_read_unlock();
1846     return ptr;
1847 }
1848
1849 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1850  * but takes a size argument.
1851  *
1852  * By the time this function returns, the returned pointer is not protected
1853  * by RCU anymore.  If the caller is not within an RCU critical section and
1854  * does not hold the iothread lock, it must have other means of protecting the
1855  * pointer, such as a reference to the region that includes the incoming
1856  * ram_addr_t.
1857  */
1858 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1859 {
1860     void *ptr;
1861     if (*size == 0) {
1862         return NULL;
1863     }
1864     if (xen_enabled()) {
1865         return xen_map_cache(addr, *size, 1);
1866     } else {
1867         RAMBlock *block;
1868         rcu_read_lock();
1869         QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1870             if (addr - block->offset < block->max_length) {
1871                 if (addr - block->offset + *size > block->max_length)
1872                     *size = block->max_length - addr + block->offset;
1873                 ptr = ramblock_ptr(block, addr - block->offset);
1874                 rcu_read_unlock();
1875                 return ptr;
1876             }
1877         }
1878
1879         fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1880         abort();
1881     }
1882 }
1883
1884 /*
1885  * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1886  * in that RAMBlock.
1887  *
1888  * ptr: Host pointer to look up
1889  * round_offset: If true round the result offset down to a page boundary
1890  * *ram_addr: set to result ram_addr
1891  * *offset: set to result offset within the RAMBlock
1892  *
1893  * Returns: RAMBlock (or NULL if not found)
1894  *
1895  * By the time this function returns, the returned pointer is not protected
1896  * by RCU anymore.  If the caller is not within an RCU critical section and
1897  * does not hold the iothread lock, it must have other means of protecting the
1898  * pointer, such as a reference to the region that includes the incoming
1899  * ram_addr_t.
1900  */
1901 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1902                                    ram_addr_t *ram_addr,
1903                                    ram_addr_t *offset)
1904 {
1905     RAMBlock *block;
1906     uint8_t *host = ptr;
1907
1908     if (xen_enabled()) {
1909         rcu_read_lock();
1910         *ram_addr = xen_ram_addr_from_mapcache(ptr);
1911         block = qemu_get_ram_block(*ram_addr);
1912         if (block) {
1913             *offset = (host - block->host);
1914         }
1915         rcu_read_unlock();
1916         return block;
1917     }
1918
1919     rcu_read_lock();
1920     block = atomic_rcu_read(&ram_list.mru_block);
1921     if (block && block->host && host - block->host < block->max_length) {
1922         goto found;
1923     }
1924
1925     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1926         /* This case append when the block is not mapped. */
1927         if (block->host == NULL) {
1928             continue;
1929         }
1930         if (host - block->host < block->max_length) {
1931             goto found;
1932         }
1933     }
1934
1935     rcu_read_unlock();
1936     return NULL;
1937
1938 found:
1939     *offset = (host - block->host);
1940     if (round_offset) {
1941         *offset &= TARGET_PAGE_MASK;
1942     }
1943     *ram_addr = block->offset + *offset;
1944     rcu_read_unlock();
1945     return block;
1946 }
1947
1948 /*
1949  * Finds the named RAMBlock
1950  *
1951  * name: The name of RAMBlock to find
1952  *
1953  * Returns: RAMBlock (or NULL if not found)
1954  */
1955 RAMBlock *qemu_ram_block_by_name(const char *name)
1956 {
1957     RAMBlock *block;
1958
1959     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1960         if (!strcmp(name, block->idstr)) {
1961             return block;
1962         }
1963     }
1964
1965     return NULL;
1966 }
1967
1968 /* Some of the softmmu routines need to translate from a host pointer
1969    (typically a TLB entry) back to a ram offset.  */
1970 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1971 {
1972     RAMBlock *block;
1973     ram_addr_t offset; /* Not used */
1974
1975     block = qemu_ram_block_from_host(ptr, false, ram_addr, &offset);
1976
1977     if (!block) {
1978         return NULL;
1979     }
1980
1981     return block->mr;
1982 }
1983
1984 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1985                                uint64_t val, unsigned size)
1986 {
1987     if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1988         tb_invalidate_phys_page_fast(ram_addr, size);
1989     }
1990     switch (size) {
1991     case 1:
1992         stb_p(qemu_get_ram_ptr(ram_addr), val);
1993         break;
1994     case 2:
1995         stw_p(qemu_get_ram_ptr(ram_addr), val);
1996         break;
1997     case 4:
1998         stl_p(qemu_get_ram_ptr(ram_addr), val);
1999         break;
2000     default:
2001         abort();
2002     }
2003     /* Set both VGA and migration bits for simplicity and to remove
2004      * the notdirty callback faster.
2005      */
2006     cpu_physical_memory_set_dirty_range(ram_addr, size,
2007                                         DIRTY_CLIENTS_NOCODE);
2008     /* we remove the notdirty callback only if the code has been
2009        flushed */
2010     if (!cpu_physical_memory_is_clean(ram_addr)) {
2011         tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2012     }
2013 }
2014
2015 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2016                                  unsigned size, bool is_write)
2017 {
2018     return is_write;
2019 }
2020
2021 static const MemoryRegionOps notdirty_mem_ops = {
2022     .write = notdirty_mem_write,
2023     .valid.accepts = notdirty_mem_accepts,
2024     .endianness = DEVICE_NATIVE_ENDIAN,
2025 };
2026
2027 /* Generate a debug exception if a watchpoint has been hit.  */
2028 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2029 {
2030     CPUState *cpu = current_cpu;
2031     CPUArchState *env = cpu->env_ptr;
2032     target_ulong pc, cs_base;
2033     target_ulong vaddr;
2034     CPUWatchpoint *wp;
2035     int cpu_flags;
2036
2037     if (cpu->watchpoint_hit) {
2038         /* We re-entered the check after replacing the TB. Now raise
2039          * the debug interrupt so that is will trigger after the
2040          * current instruction. */
2041         cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2042         return;
2043     }
2044     vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2045     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2046         if (cpu_watchpoint_address_matches(wp, vaddr, len)
2047             && (wp->flags & flags)) {
2048             if (flags == BP_MEM_READ) {
2049                 wp->flags |= BP_WATCHPOINT_HIT_READ;
2050             } else {
2051                 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2052             }
2053             wp->hitaddr = vaddr;
2054             wp->hitattrs = attrs;
2055             if (!cpu->watchpoint_hit) {
2056                 cpu->watchpoint_hit = wp;
2057                 tb_check_watchpoint(cpu);
2058                 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2059                     cpu->exception_index = EXCP_DEBUG;
2060                     cpu_loop_exit(cpu);
2061                 } else {
2062                     cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2063                     tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2064                     cpu_resume_from_signal(cpu, NULL);
2065                 }
2066             }
2067         } else {
2068             wp->flags &= ~BP_WATCHPOINT_HIT;
2069         }
2070     }
2071 }
2072
2073 /* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
2074    so these check for a hit then pass through to the normal out-of-line
2075    phys routines.  */
2076 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2077                                   unsigned size, MemTxAttrs attrs)
2078 {
2079     MemTxResult res;
2080     uint64_t data;
2081
2082     check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2083     switch (size) {
2084     case 1:
2085         data = address_space_ldub(&address_space_memory, addr, attrs, &res);
2086         break;
2087     case 2:
2088         data = address_space_lduw(&address_space_memory, addr, attrs, &res);
2089         break;
2090     case 4:
2091         data = address_space_ldl(&address_space_memory, addr, attrs, &res);
2092         break;
2093     default: abort();
2094     }
2095     *pdata = data;
2096     return res;
2097 }
2098
2099 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2100                                    uint64_t val, unsigned size,
2101                                    MemTxAttrs attrs)
2102 {
2103     MemTxResult res;
2104
2105     check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2106     switch (size) {
2107     case 1:
2108         address_space_stb(&address_space_memory, addr, val, attrs, &res);
2109         break;
2110     case 2:
2111         address_space_stw(&address_space_memory, addr, val, attrs, &res);
2112         break;
2113     case 4:
2114         address_space_stl(&address_space_memory, addr, val, attrs, &res);
2115         break;
2116     default: abort();
2117     }
2118     return res;
2119 }
2120
2121 static const MemoryRegionOps watch_mem_ops = {
2122     .read_with_attrs = watch_mem_read,
2123     .write_with_attrs = watch_mem_write,
2124     .endianness = DEVICE_NATIVE_ENDIAN,
2125 };
2126
2127 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2128                                 unsigned len, MemTxAttrs attrs)
2129 {
2130     subpage_t *subpage = opaque;
2131     uint8_t buf[8];
2132     MemTxResult res;
2133
2134 #if defined(DEBUG_SUBPAGE)
2135     printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2136            subpage, len, addr);
2137 #endif
2138     res = address_space_read(subpage->as, addr + subpage->base,
2139                              attrs, buf, len);
2140     if (res) {
2141         return res;
2142     }
2143     switch (len) {
2144     case 1:
2145         *data = ldub_p(buf);
2146         return MEMTX_OK;
2147     case 2:
2148         *data = lduw_p(buf);
2149         return MEMTX_OK;
2150     case 4:
2151         *data = ldl_p(buf);
2152         return MEMTX_OK;
2153     case 8:
2154         *data = ldq_p(buf);
2155         return MEMTX_OK;
2156     default:
2157         abort();
2158     }
2159 }
2160
2161 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2162                                  uint64_t value, unsigned len, MemTxAttrs attrs)
2163 {
2164     subpage_t *subpage = opaque;
2165     uint8_t buf[8];
2166
2167 #if defined(DEBUG_SUBPAGE)
2168     printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2169            " value %"PRIx64"\n",
2170            __func__, subpage, len, addr, value);
2171 #endif
2172     switch (len) {
2173     case 1:
2174         stb_p(buf, value);
2175         break;
2176     case 2:
2177         stw_p(buf, value);
2178         break;
2179     case 4:
2180         stl_p(buf, value);
2181         break;
2182     case 8:
2183         stq_p(buf, value);
2184         break;
2185     default:
2186         abort();
2187     }
2188     return address_space_write(subpage->as, addr + subpage->base,
2189                                attrs, buf, len);
2190 }
2191
2192 static bool subpage_accepts(void *opaque, hwaddr addr,
2193                             unsigned len, bool is_write)
2194 {
2195     subpage_t *subpage = opaque;
2196 #if defined(DEBUG_SUBPAGE)
2197     printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2198            __func__, subpage, is_write ? 'w' : 'r', len, addr);
2199 #endif
2200
2201     return address_space_access_valid(subpage->as, addr + subpage->base,
2202                                       len, is_write);
2203 }
2204
2205 static const MemoryRegionOps subpage_ops = {
2206     .read_with_attrs = subpage_read,
2207     .write_with_attrs = subpage_write,
2208     .impl.min_access_size = 1,
2209     .impl.max_access_size = 8,
2210     .valid.min_access_size = 1,
2211     .valid.max_access_size = 8,
2212     .valid.accepts = subpage_accepts,
2213     .endianness = DEVICE_NATIVE_ENDIAN,
2214 };
2215
2216 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2217                              uint16_t section)
2218 {
2219     int idx, eidx;
2220
2221     if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2222         return -1;
2223     idx = SUBPAGE_IDX(start);
2224     eidx = SUBPAGE_IDX(end);
2225 #if defined(DEBUG_SUBPAGE)
2226     printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2227            __func__, mmio, start, end, idx, eidx, section);
2228 #endif
2229     for (; idx <= eidx; idx++) {
2230         mmio->sub_section[idx] = section;
2231     }
2232
2233     return 0;
2234 }
2235
2236 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2237 {
2238     subpage_t *mmio;
2239
2240     mmio = g_malloc0(sizeof(subpage_t));
2241
2242     mmio->as = as;
2243     mmio->base = base;
2244     memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2245                           NULL, TARGET_PAGE_SIZE);
2246     mmio->iomem.subpage = true;
2247 #if defined(DEBUG_SUBPAGE)
2248     printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2249            mmio, base, TARGET_PAGE_SIZE);
2250 #endif
2251     subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2252
2253     return mmio;
2254 }
2255
2256 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2257                               MemoryRegion *mr)
2258 {
2259     assert(as);
2260     MemoryRegionSection section = {
2261         .address_space = as,
2262         .mr = mr,
2263         .offset_within_address_space = 0,
2264         .offset_within_region = 0,
2265         .size = int128_2_64(),
2266     };
2267
2268     return phys_section_add(map, &section);
2269 }
2270
2271 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
2272 {
2273     CPUAddressSpace *cpuas = &cpu->cpu_ases[0];
2274     AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2275     MemoryRegionSection *sections = d->map.sections;
2276
2277     return sections[index & ~TARGET_PAGE_MASK].mr;
2278 }
2279
2280 static void io_mem_init(void)
2281 {
2282     memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2283     memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2284                           "unassigned", UINT64_MAX);
2285     memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2286                           NULL, UINT64_MAX);
2287     memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2288                           NULL, UINT64_MAX);
2289 }
2290
2291 MemoryRegion *get_unassigned_memory(void)
2292 {
2293     return &io_mem_unassigned;
2294 }
2295
2296 static void mem_begin(MemoryListener *listener)
2297 {
2298     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2299     AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2300     uint16_t n;
2301
2302     n = dummy_section(&d->map, as, &io_mem_unassigned);
2303     assert(n == PHYS_SECTION_UNASSIGNED);
2304     n = dummy_section(&d->map, as, &io_mem_notdirty);
2305     assert(n == PHYS_SECTION_NOTDIRTY);
2306     n = dummy_section(&d->map, as, &io_mem_rom);
2307     assert(n == PHYS_SECTION_ROM);
2308     n = dummy_section(&d->map, as, &io_mem_watch);
2309     assert(n == PHYS_SECTION_WATCH);
2310
2311     d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2312     d->as = as;
2313     as->next_dispatch = d;
2314 }
2315
2316 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2317 {
2318     phys_sections_free(&d->map);
2319     g_free(d);
2320 }
2321
2322 static void mem_commit(MemoryListener *listener)
2323 {
2324     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2325     AddressSpaceDispatch *cur = as->dispatch;
2326     AddressSpaceDispatch *next = as->next_dispatch;
2327
2328     phys_page_compact_all(next, next->map.nodes_nb);
2329
2330     atomic_rcu_set(&as->dispatch, next);
2331     if (cur) {
2332         call_rcu(cur, address_space_dispatch_free, rcu);
2333     }
2334 }
2335
2336 static void tcg_commit(MemoryListener *listener)
2337 {
2338     CPUAddressSpace *cpuas;
2339     AddressSpaceDispatch *d;
2340
2341     /* since each CPU stores ram addresses in its TLB cache, we must
2342        reset the modified entries */
2343     cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2344     cpu_reloading_memory_map();
2345     /* The CPU and TLB are protected by the iothread lock.
2346      * We reload the dispatch pointer now because cpu_reloading_memory_map()
2347      * may have split the RCU critical section.
2348      */
2349     d = atomic_rcu_read(&cpuas->as->dispatch);
2350     cpuas->memory_dispatch = d;
2351     tlb_flush(cpuas->cpu, 1);
2352 }
2353
2354 void address_space_init_dispatch(AddressSpace *as)
2355 {
2356     as->dispatch = NULL;
2357     as->dispatch_listener = (MemoryListener) {
2358         .begin = mem_begin,
2359         .commit = mem_commit,
2360         .region_add = mem_add,
2361         .region_nop = mem_add,
2362         .priority = 0,
2363     };
2364     memory_listener_register(&as->dispatch_listener, as);
2365 }
2366
2367 void address_space_unregister(AddressSpace *as)
2368 {
2369     memory_listener_unregister(&as->dispatch_listener);
2370 }
2371
2372 void address_space_destroy_dispatch(AddressSpace *as)
2373 {
2374     AddressSpaceDispatch *d = as->dispatch;
2375
2376     atomic_rcu_set(&as->dispatch, NULL);
2377     if (d) {
2378         call_rcu(d, address_space_dispatch_free, rcu);
2379     }
2380 }
2381
2382 static void memory_map_init(void)
2383 {
2384     system_memory = g_malloc(sizeof(*system_memory));
2385
2386     memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2387     address_space_init(&address_space_memory, system_memory, "memory");
2388
2389     system_io = g_malloc(sizeof(*system_io));
2390     memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2391                           65536);
2392     address_space_init(&address_space_io, system_io, "I/O");
2393 }
2394
2395 MemoryRegion *get_system_memory(void)
2396 {
2397     return system_memory;
2398 }
2399
2400 MemoryRegion *get_system_io(void)
2401 {
2402     return system_io;
2403 }
2404
2405 #endif /* !defined(CONFIG_USER_ONLY) */
2406
2407 /* physical memory access (slow version, mainly for debug) */
2408 #if defined(CONFIG_USER_ONLY)
2409 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2410                         uint8_t *buf, int len, int is_write)
2411 {
2412     int l, flags;
2413     target_ulong page;
2414     void * p;
2415
2416     while (len > 0) {
2417         page = addr & TARGET_PAGE_MASK;
2418         l = (page + TARGET_PAGE_SIZE) - addr;
2419         if (l > len)
2420             l = len;
2421         flags = page_get_flags(page);
2422         if (!(flags & PAGE_VALID))
2423             return -1;
2424         if (is_write) {
2425             if (!(flags & PAGE_WRITE))
2426                 return -1;
2427             /* XXX: this code should not depend on lock_user */
2428             if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2429                 return -1;
2430             memcpy(p, buf, l);
2431             unlock_user(p, addr, l);
2432         } else {
2433             if (!(flags & PAGE_READ))
2434                 return -1;
2435             /* XXX: this code should not depend on lock_user */
2436             if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2437                 return -1;
2438             memcpy(buf, p, l);
2439             unlock_user(p, addr, 0);
2440         }
2441         len -= l;
2442         buf += l;
2443         addr += l;
2444     }
2445     return 0;
2446 }
2447
2448 #else
2449
2450 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2451                                      hwaddr length)
2452 {
2453     uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2454     /* No early return if dirty_log_mask is or becomes 0, because
2455      * cpu_physical_memory_set_dirty_range will still call
2456      * xen_modified_memory.
2457      */
2458     if (dirty_log_mask) {
2459         dirty_log_mask =
2460             cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2461     }
2462     if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2463         tb_invalidate_phys_range(addr, addr + length);
2464         dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2465     }
2466     cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2467 }
2468
2469 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2470 {
2471     unsigned access_size_max = mr->ops->valid.max_access_size;
2472
2473     /* Regions are assumed to support 1-4 byte accesses unless
2474        otherwise specified.  */
2475     if (access_size_max == 0) {
2476         access_size_max = 4;
2477     }
2478
2479     /* Bound the maximum access by the alignment of the address.  */
2480     if (!mr->ops->impl.unaligned) {
2481         unsigned align_size_max = addr & -addr;
2482         if (align_size_max != 0 && align_size_max < access_size_max) {
2483             access_size_max = align_size_max;
2484         }
2485     }
2486
2487     /* Don't attempt accesses larger than the maximum.  */
2488     if (l > access_size_max) {
2489         l = access_size_max;
2490     }
2491     l = pow2floor(l);
2492
2493     return l;
2494 }
2495
2496 static bool prepare_mmio_access(MemoryRegion *mr)
2497 {
2498     bool unlocked = !qemu_mutex_iothread_locked();
2499     bool release_lock = false;
2500
2501     if (unlocked && mr->global_locking) {
2502         qemu_mutex_lock_iothread();
2503         unlocked = false;
2504         release_lock = true;
2505     }
2506     if (mr->flush_coalesced_mmio) {
2507         if (unlocked) {
2508             qemu_mutex_lock_iothread();
2509         }
2510         qemu_flush_coalesced_mmio_buffer();
2511         if (unlocked) {
2512             qemu_mutex_unlock_iothread();
2513         }
2514     }
2515
2516     return release_lock;
2517 }
2518
2519 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2520                              uint8_t *buf, int len, bool is_write)
2521 {
2522     hwaddr l;
2523     uint8_t *ptr;
2524     uint64_t val;
2525     hwaddr addr1;
2526     MemoryRegion *mr;
2527     MemTxResult result = MEMTX_OK;
2528     bool release_lock = false;
2529
2530     rcu_read_lock();
2531     while (len > 0) {
2532         l = len;
2533         mr = address_space_translate(as, addr, &addr1, &l, is_write);
2534
2535         if (is_write) {
2536             if (!memory_access_is_direct(mr, is_write)) {
2537                 release_lock |= prepare_mmio_access(mr);
2538                 l = memory_access_size(mr, l, addr1);
2539                 /* XXX: could force current_cpu to NULL to avoid
2540                    potential bugs */
2541                 switch (l) {
2542                 case 8:
2543                     /* 64 bit write access */
2544                     val = ldq_p(buf);
2545                     result |= memory_region_dispatch_write(mr, addr1, val, 8,
2546                                                            attrs);
2547                     break;
2548                 case 4:
2549                     /* 32 bit write access */
2550                     val = ldl_p(buf);
2551                     result |= memory_region_dispatch_write(mr, addr1, val, 4,
2552                                                            attrs);
2553                     break;
2554                 case 2:
2555                     /* 16 bit write access */
2556                     val = lduw_p(buf);
2557                     result |= memory_region_dispatch_write(mr, addr1, val, 2,
2558                                                            attrs);
2559                     break;
2560                 case 1:
2561                     /* 8 bit write access */
2562                     val = ldub_p(buf);
2563                     result |= memory_region_dispatch_write(mr, addr1, val, 1,
2564                                                            attrs);
2565                     break;
2566                 default:
2567                     abort();
2568                 }
2569             } else {
2570                 addr1 += memory_region_get_ram_addr(mr);
2571                 /* RAM case */
2572                 ptr = qemu_get_ram_ptr(addr1);
2573                 memcpy(ptr, buf, l);
2574                 invalidate_and_set_dirty(mr, addr1, l);
2575             }
2576         } else {
2577             if (!memory_access_is_direct(mr, is_write)) {
2578                 /* I/O case */
2579                 release_lock |= prepare_mmio_access(mr);
2580                 l = memory_access_size(mr, l, addr1);
2581                 switch (l) {
2582                 case 8:
2583                     /* 64 bit read access */
2584                     result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2585                                                           attrs);
2586                     stq_p(buf, val);
2587                     break;
2588                 case 4:
2589                     /* 32 bit read access */
2590                     result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2591                                                           attrs);
2592                     stl_p(buf, val);
2593                     break;
2594                 case 2:
2595                     /* 16 bit read access */
2596                     result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2597                                                           attrs);
2598                     stw_p(buf, val);
2599                     break;
2600                 case 1:
2601                     /* 8 bit read access */
2602                     result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2603                                                           attrs);
2604                     stb_p(buf, val);
2605                     break;
2606                 default:
2607                     abort();
2608                 }
2609             } else {
2610                 /* RAM case */
2611                 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2612                 memcpy(buf, ptr, l);
2613             }
2614         }
2615
2616         if (release_lock) {
2617             qemu_mutex_unlock_iothread();
2618             release_lock = false;
2619         }
2620
2621         len -= l;
2622         buf += l;
2623         addr += l;
2624     }
2625     rcu_read_unlock();
2626
2627     return result;
2628 }
2629
2630 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2631                                 const uint8_t *buf, int len)
2632 {
2633     return address_space_rw(as, addr, attrs, (uint8_t *)buf, len, true);
2634 }
2635
2636 MemTxResult address_space_read(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2637                                uint8_t *buf, int len)
2638 {
2639     return address_space_rw(as, addr, attrs, buf, len, false);
2640 }
2641
2642
2643 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2644                             int len, int is_write)
2645 {
2646     address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2647                      buf, len, is_write);
2648 }
2649
2650 enum write_rom_type {
2651     WRITE_DATA,
2652     FLUSH_CACHE,
2653 };
2654
2655 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2656     hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2657 {
2658     hwaddr l;
2659     uint8_t *ptr;
2660     hwaddr addr1;
2661     MemoryRegion *mr;
2662
2663     rcu_read_lock();
2664     while (len > 0) {
2665         l = len;
2666         mr = address_space_translate(as, addr, &addr1, &l, true);
2667
2668         if (!(memory_region_is_ram(mr) ||
2669               memory_region_is_romd(mr))) {
2670             l = memory_access_size(mr, l, addr1);
2671         } else {
2672             addr1 += memory_region_get_ram_addr(mr);
2673             /* ROM/RAM case */
2674             ptr = qemu_get_ram_ptr(addr1);
2675             switch (type) {
2676             case WRITE_DATA:
2677                 memcpy(ptr, buf, l);
2678                 invalidate_and_set_dirty(mr, addr1, l);
2679                 break;
2680             case FLUSH_CACHE:
2681                 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2682                 break;
2683             }
2684         }
2685         len -= l;
2686         buf += l;
2687         addr += l;
2688     }
2689     rcu_read_unlock();
2690 }
2691
2692 /* used for ROM loading : can write in RAM and ROM */
2693 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2694                                    const uint8_t *buf, int len)
2695 {
2696     cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2697 }
2698
2699 void cpu_flush_icache_range(hwaddr start, int len)
2700 {
2701     /*
2702      * This function should do the same thing as an icache flush that was
2703      * triggered from within the guest. For TCG we are always cache coherent,
2704      * so there is no need to flush anything. For KVM / Xen we need to flush
2705      * the host's instruction cache at least.
2706      */
2707     if (tcg_enabled()) {
2708         return;
2709     }
2710
2711     cpu_physical_memory_write_rom_internal(&address_space_memory,
2712                                            start, NULL, len, FLUSH_CACHE);
2713 }
2714
2715 typedef struct {
2716     MemoryRegion *mr;
2717     void *buffer;
2718     hwaddr addr;
2719     hwaddr len;
2720     bool in_use;
2721 } BounceBuffer;
2722
2723 static BounceBuffer bounce;
2724
2725 typedef struct MapClient {
2726     QEMUBH *bh;
2727     QLIST_ENTRY(MapClient) link;
2728 } MapClient;
2729
2730 QemuMutex map_client_list_lock;
2731 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2732     = QLIST_HEAD_INITIALIZER(map_client_list);
2733
2734 static void cpu_unregister_map_client_do(MapClient *client)
2735 {
2736     QLIST_REMOVE(client, link);
2737     g_free(client);
2738 }
2739
2740 static void cpu_notify_map_clients_locked(void)
2741 {
2742     MapClient *client;
2743
2744     while (!QLIST_EMPTY(&map_client_list)) {
2745         client = QLIST_FIRST(&map_client_list);
2746         qemu_bh_schedule(client->bh);
2747         cpu_unregister_map_client_do(client);
2748     }
2749 }
2750
2751 void cpu_register_map_client(QEMUBH *bh)
2752 {
2753     MapClient *client = g_malloc(sizeof(*client));
2754
2755     qemu_mutex_lock(&map_client_list_lock);
2756     client->bh = bh;
2757     QLIST_INSERT_HEAD(&map_client_list, client, link);
2758     if (!atomic_read(&bounce.in_use)) {
2759         cpu_notify_map_clients_locked();
2760     }
2761     qemu_mutex_unlock(&map_client_list_lock);
2762 }
2763
2764 void cpu_exec_init_all(void)
2765 {
2766     qemu_mutex_init(&ram_list.mutex);
2767     io_mem_init();
2768     memory_map_init();
2769     qemu_mutex_init(&map_client_list_lock);
2770 }
2771
2772 void cpu_unregister_map_client(QEMUBH *bh)
2773 {
2774     MapClient *client;
2775
2776     qemu_mutex_lock(&map_client_list_lock);
2777     QLIST_FOREACH(client, &map_client_list, link) {
2778         if (client->bh == bh) {
2779             cpu_unregister_map_client_do(client);
2780             break;
2781         }
2782     }
2783     qemu_mutex_unlock(&map_client_list_lock);
2784 }
2785
2786 static void cpu_notify_map_clients(void)
2787 {
2788     qemu_mutex_lock(&map_client_list_lock);
2789     cpu_notify_map_clients_locked();
2790     qemu_mutex_unlock(&map_client_list_lock);
2791 }
2792
2793 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2794 {
2795     MemoryRegion *mr;
2796     hwaddr l, xlat;
2797
2798     rcu_read_lock();
2799     while (len > 0) {
2800         l = len;
2801         mr = address_space_translate(as, addr, &xlat, &l, is_write);
2802         if (!memory_access_is_direct(mr, is_write)) {
2803             l = memory_access_size(mr, l, addr);
2804             if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2805                 return false;
2806             }
2807         }
2808
2809         len -= l;
2810         addr += l;
2811     }
2812     rcu_read_unlock();
2813     return true;
2814 }
2815
2816 /* Map a physical memory region into a host virtual address.
2817  * May map a subset of the requested range, given by and returned in *plen.
2818  * May return NULL if resources needed to perform the mapping are exhausted.
2819  * Use only for reads OR writes - not for read-modify-write operations.
2820  * Use cpu_register_map_client() to know when retrying the map operation is
2821  * likely to succeed.
2822  */
2823 void *address_space_map(AddressSpace *as,
2824                         hwaddr addr,
2825                         hwaddr *plen,
2826                         bool is_write)
2827 {
2828     hwaddr len = *plen;
2829     hwaddr done = 0;
2830     hwaddr l, xlat, base;
2831     MemoryRegion *mr, *this_mr;
2832     ram_addr_t raddr;
2833
2834     if (len == 0) {
2835         return NULL;
2836     }
2837
2838     l = len;
2839     rcu_read_lock();
2840     mr = address_space_translate(as, addr, &xlat, &l, is_write);
2841
2842     if (!memory_access_is_direct(mr, is_write)) {
2843         if (atomic_xchg(&bounce.in_use, true)) {
2844             rcu_read_unlock();
2845             return NULL;
2846         }
2847         /* Avoid unbounded allocations */
2848         l = MIN(l, TARGET_PAGE_SIZE);
2849         bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2850         bounce.addr = addr;
2851         bounce.len = l;
2852
2853         memory_region_ref(mr);
2854         bounce.mr = mr;
2855         if (!is_write) {
2856             address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2857                                bounce.buffer, l);
2858         }
2859
2860         rcu_read_unlock();
2861         *plen = l;
2862         return bounce.buffer;
2863     }
2864
2865     base = xlat;
2866     raddr = memory_region_get_ram_addr(mr);
2867
2868     for (;;) {
2869         len -= l;
2870         addr += l;
2871         done += l;
2872         if (len == 0) {
2873             break;
2874         }
2875
2876         l = len;
2877         this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2878         if (this_mr != mr || xlat != base + done) {
2879             break;
2880         }
2881     }
2882
2883     memory_region_ref(mr);
2884     rcu_read_unlock();
2885     *plen = done;
2886     return qemu_ram_ptr_length(raddr + base, plen);
2887 }
2888
2889 /* Unmaps a memory region previously mapped by address_space_map().
2890  * Will also mark the memory as dirty if is_write == 1.  access_len gives
2891  * the amount of memory that was actually read or written by the caller.
2892  */
2893 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2894                          int is_write, hwaddr access_len)
2895 {
2896     if (buffer != bounce.buffer) {
2897         MemoryRegion *mr;
2898         ram_addr_t addr1;
2899
2900         mr = qemu_ram_addr_from_host(buffer, &addr1);
2901         assert(mr != NULL);
2902         if (is_write) {
2903             invalidate_and_set_dirty(mr, addr1, access_len);
2904         }
2905         if (xen_enabled()) {
2906             xen_invalidate_map_cache_entry(buffer);
2907         }
2908         memory_region_unref(mr);
2909         return;
2910     }
2911     if (is_write) {
2912         address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
2913                             bounce.buffer, access_len);
2914     }
2915     qemu_vfree(bounce.buffer);
2916     bounce.buffer = NULL;
2917     memory_region_unref(bounce.mr);
2918     atomic_mb_set(&bounce.in_use, false);
2919     cpu_notify_map_clients();
2920 }
2921
2922 void *cpu_physical_memory_map(hwaddr addr,
2923                               hwaddr *plen,
2924                               int is_write)
2925 {
2926     return address_space_map(&address_space_memory, addr, plen, is_write);
2927 }
2928
2929 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2930                                int is_write, hwaddr access_len)
2931 {
2932     return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2933 }
2934
2935 /* warning: addr must be aligned */
2936 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
2937                                                   MemTxAttrs attrs,
2938                                                   MemTxResult *result,
2939                                                   enum device_endian endian)
2940 {
2941     uint8_t *ptr;
2942     uint64_t val;
2943     MemoryRegion *mr;
2944     hwaddr l = 4;
2945     hwaddr addr1;
2946     MemTxResult r;
2947     bool release_lock = false;
2948
2949     rcu_read_lock();
2950     mr = address_space_translate(as, addr, &addr1, &l, false);
2951     if (l < 4 || !memory_access_is_direct(mr, false)) {
2952         release_lock |= prepare_mmio_access(mr);
2953
2954         /* I/O case */
2955         r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
2956 #if defined(TARGET_WORDS_BIGENDIAN)
2957         if (endian == DEVICE_LITTLE_ENDIAN) {
2958             val = bswap32(val);
2959         }
2960 #else
2961         if (endian == DEVICE_BIG_ENDIAN) {
2962             val = bswap32(val);
2963         }
2964 #endif
2965     } else {
2966         /* RAM case */
2967         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2968                                 & TARGET_PAGE_MASK)
2969                                + addr1);
2970         switch (endian) {
2971         case DEVICE_LITTLE_ENDIAN:
2972             val = ldl_le_p(ptr);
2973             break;
2974         case DEVICE_BIG_ENDIAN:
2975             val = ldl_be_p(ptr);
2976             break;
2977         default:
2978             val = ldl_p(ptr);
2979             break;
2980         }
2981         r = MEMTX_OK;
2982     }
2983     if (result) {
2984         *result = r;
2985     }
2986     if (release_lock) {
2987         qemu_mutex_unlock_iothread();
2988     }
2989     rcu_read_unlock();
2990     return val;
2991 }
2992
2993 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
2994                            MemTxAttrs attrs, MemTxResult *result)
2995 {
2996     return address_space_ldl_internal(as, addr, attrs, result,
2997                                       DEVICE_NATIVE_ENDIAN);
2998 }
2999
3000 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3001                               MemTxAttrs attrs, MemTxResult *result)
3002 {
3003     return address_space_ldl_internal(as, addr, attrs, result,
3004                                       DEVICE_LITTLE_ENDIAN);
3005 }
3006
3007 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3008                               MemTxAttrs attrs, MemTxResult *result)
3009 {
3010     return address_space_ldl_internal(as, addr, attrs, result,
3011                                       DEVICE_BIG_ENDIAN);
3012 }
3013
3014 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3015 {
3016     return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3017 }
3018
3019 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3020 {
3021     return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3022 }
3023
3024 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3025 {
3026     return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3027 }
3028
3029 /* warning: addr must be aligned */
3030 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3031                                                   MemTxAttrs attrs,
3032                                                   MemTxResult *result,
3033                                                   enum device_endian endian)
3034 {
3035     uint8_t *ptr;
3036     uint64_t val;
3037     MemoryRegion *mr;
3038     hwaddr l = 8;
3039     hwaddr addr1;
3040     MemTxResult r;
3041     bool release_lock = false;
3042
3043     rcu_read_lock();
3044     mr = address_space_translate(as, addr, &addr1, &l,
3045                                  false);
3046     if (l < 8 || !memory_access_is_direct(mr, false)) {
3047         release_lock |= prepare_mmio_access(mr);
3048
3049         /* I/O case */
3050         r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3051 #if defined(TARGET_WORDS_BIGENDIAN)
3052         if (endian == DEVICE_LITTLE_ENDIAN) {
3053             val = bswap64(val);
3054         }
3055 #else
3056         if (endian == DEVICE_BIG_ENDIAN) {
3057             val = bswap64(val);
3058         }
3059 #endif
3060     } else {
3061         /* RAM case */
3062         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3063                                 & TARGET_PAGE_MASK)
3064                                + addr1);
3065         switch (endian) {
3066         case DEVICE_LITTLE_ENDIAN:
3067             val = ldq_le_p(ptr);
3068             break;
3069         case DEVICE_BIG_ENDIAN:
3070             val = ldq_be_p(ptr);
3071             break;
3072         default:
3073             val = ldq_p(ptr);
3074             break;
3075         }
3076         r = MEMTX_OK;
3077     }
3078     if (result) {
3079         *result = r;
3080     }
3081     if (release_lock) {
3082         qemu_mutex_unlock_iothread();
3083     }
3084     rcu_read_unlock();
3085     return val;
3086 }
3087
3088 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3089                            MemTxAttrs attrs, MemTxResult *result)
3090 {
3091     return address_space_ldq_internal(as, addr, attrs, result,
3092                                       DEVICE_NATIVE_ENDIAN);
3093 }
3094
3095 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3096                            MemTxAttrs attrs, MemTxResult *result)
3097 {
3098     return address_space_ldq_internal(as, addr, attrs, result,
3099                                       DEVICE_LITTLE_ENDIAN);
3100 }
3101
3102 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3103                            MemTxAttrs attrs, MemTxResult *result)
3104 {
3105     return address_space_ldq_internal(as, addr, attrs, result,
3106                                       DEVICE_BIG_ENDIAN);
3107 }
3108
3109 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3110 {
3111     return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3112 }
3113
3114 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3115 {
3116     return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3117 }
3118
3119 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3120 {
3121     return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3122 }
3123
3124 /* XXX: optimize */
3125 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3126                             MemTxAttrs attrs, MemTxResult *result)
3127 {
3128     uint8_t val;
3129     MemTxResult r;
3130
3131     r = address_space_rw(as, addr, attrs, &val, 1, 0);
3132     if (result) {
3133         *result = r;
3134     }
3135     return val;
3136 }
3137
3138 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3139 {
3140     return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3141 }
3142
3143 /* warning: addr must be aligned */
3144 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3145                                                    hwaddr addr,
3146                                                    MemTxAttrs attrs,
3147                                                    MemTxResult *result,
3148                                                    enum device_endian endian)
3149 {
3150     uint8_t *ptr;
3151     uint64_t val;
3152     MemoryRegion *mr;
3153     hwaddr l = 2;
3154     hwaddr addr1;
3155     MemTxResult r;
3156     bool release_lock = false;
3157
3158     rcu_read_lock();
3159     mr = address_space_translate(as, addr, &addr1, &l,
3160                                  false);
3161     if (l < 2 || !memory_access_is_direct(mr, false)) {
3162         release_lock |= prepare_mmio_access(mr);
3163
3164         /* I/O case */
3165         r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3166 #if defined(TARGET_WORDS_BIGENDIAN)
3167         if (endian == DEVICE_LITTLE_ENDIAN) {
3168             val = bswap16(val);
3169         }
3170 #else
3171         if (endian == DEVICE_BIG_ENDIAN) {
3172             val = bswap16(val);
3173         }
3174 #endif
3175     } else {
3176         /* RAM case */
3177         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3178                                 & TARGET_PAGE_MASK)
3179                                + addr1);
3180         switch (endian) {
3181         case DEVICE_LITTLE_ENDIAN:
3182             val = lduw_le_p(ptr);
3183             break;
3184         case DEVICE_BIG_ENDIAN:
3185             val = lduw_be_p(ptr);
3186             break;
3187         default:
3188             val = lduw_p(ptr);
3189             break;
3190         }
3191         r = MEMTX_OK;
3192     }
3193     if (result) {
3194         *result = r;
3195     }
3196     if (release_lock) {
3197         qemu_mutex_unlock_iothread();
3198     }
3199     rcu_read_unlock();
3200     return val;
3201 }
3202
3203 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3204                            MemTxAttrs attrs, MemTxResult *result)
3205 {
3206     return address_space_lduw_internal(as, addr, attrs, result,
3207                                        DEVICE_NATIVE_ENDIAN);
3208 }
3209
3210 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3211                            MemTxAttrs attrs, MemTxResult *result)
3212 {
3213     return address_space_lduw_internal(as, addr, attrs, result,
3214                                        DEVICE_LITTLE_ENDIAN);
3215 }
3216
3217 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3218                            MemTxAttrs attrs, MemTxResult *result)
3219 {
3220     return address_space_lduw_internal(as, addr, attrs, result,
3221                                        DEVICE_BIG_ENDIAN);
3222 }
3223
3224 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3225 {
3226     return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3227 }
3228
3229 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3230 {
3231     return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3232 }
3233
3234 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3235 {
3236     return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3237 }
3238
3239 /* warning: addr must be aligned. The ram page is not masked as dirty
3240    and the code inside is not invalidated. It is useful if the dirty
3241    bits are used to track modified PTEs */
3242 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3243                                 MemTxAttrs attrs, MemTxResult *result)
3244 {
3245     uint8_t *ptr;
3246     MemoryRegion *mr;
3247     hwaddr l = 4;
3248     hwaddr addr1;
3249     MemTxResult r;
3250     uint8_t dirty_log_mask;
3251     bool release_lock = false;
3252
3253     rcu_read_lock();
3254     mr = address_space_translate(as, addr, &addr1, &l,
3255                                  true);
3256     if (l < 4 || !memory_access_is_direct(mr, true)) {
3257         release_lock |= prepare_mmio_access(mr);
3258
3259         r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3260     } else {
3261         addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3262         ptr = qemu_get_ram_ptr(addr1);
3263         stl_p(ptr, val);
3264
3265         dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3266         dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3267         cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3268         r = MEMTX_OK;
3269     }
3270     if (result) {
3271         *result = r;
3272     }
3273     if (release_lock) {
3274         qemu_mutex_unlock_iothread();
3275     }
3276     rcu_read_unlock();
3277 }
3278
3279 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3280 {
3281     address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3282 }
3283
3284 /* warning: addr must be aligned */
3285 static inline void address_space_stl_internal(AddressSpace *as,
3286                                               hwaddr addr, uint32_t val,
3287                                               MemTxAttrs attrs,
3288                                               MemTxResult *result,
3289                                               enum device_endian endian)
3290 {
3291     uint8_t *ptr;
3292     MemoryRegion *mr;
3293     hwaddr l = 4;
3294     hwaddr addr1;
3295     MemTxResult r;
3296     bool release_lock = false;
3297
3298     rcu_read_lock();
3299     mr = address_space_translate(as, addr, &addr1, &l,
3300                                  true);
3301     if (l < 4 || !memory_access_is_direct(mr, true)) {
3302         release_lock |= prepare_mmio_access(mr);
3303
3304 #if defined(TARGET_WORDS_BIGENDIAN)
3305         if (endian == DEVICE_LITTLE_ENDIAN) {
3306             val = bswap32(val);
3307         }
3308 #else
3309         if (endian == DEVICE_BIG_ENDIAN) {
3310             val = bswap32(val);
3311         }
3312 #endif
3313         r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3314     } else {
3315         /* RAM case */
3316         addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3317         ptr = qemu_get_ram_ptr(addr1);
3318         switch (endian) {
3319         case DEVICE_LITTLE_ENDIAN:
3320             stl_le_p(ptr, val);
3321             break;
3322         case DEVICE_BIG_ENDIAN:
3323             stl_be_p(ptr, val);
3324             break;
3325         default:
3326             stl_p(ptr, val);
3327             break;
3328         }
3329         invalidate_and_set_dirty(mr, addr1, 4);
3330         r = MEMTX_OK;
3331     }
3332     if (result) {
3333         *result = r;
3334     }
3335     if (release_lock) {
3336         qemu_mutex_unlock_iothread();
3337     }
3338     rcu_read_unlock();
3339 }
3340
3341 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3342                        MemTxAttrs attrs, MemTxResult *result)
3343 {
3344     address_space_stl_internal(as, addr, val, attrs, result,
3345                                DEVICE_NATIVE_ENDIAN);
3346 }
3347
3348 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3349                        MemTxAttrs attrs, MemTxResult *result)
3350 {
3351     address_space_stl_internal(as, addr, val, attrs, result,
3352                                DEVICE_LITTLE_ENDIAN);
3353 }
3354
3355 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3356                        MemTxAttrs attrs, MemTxResult *result)
3357 {
3358     address_space_stl_internal(as, addr, val, attrs, result,
3359                                DEVICE_BIG_ENDIAN);
3360 }
3361
3362 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3363 {
3364     address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3365 }
3366
3367 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3368 {
3369     address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3370 }
3371
3372 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3373 {
3374     address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3375 }
3376
3377 /* XXX: optimize */
3378 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3379                        MemTxAttrs attrs, MemTxResult *result)
3380 {
3381     uint8_t v = val;
3382     MemTxResult r;
3383
3384     r = address_space_rw(as, addr, attrs, &v, 1, 1);
3385     if (result) {
3386         *result = r;
3387     }
3388 }
3389
3390 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3391 {
3392     address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3393 }
3394
3395 /* warning: addr must be aligned */
3396 static inline void address_space_stw_internal(AddressSpace *as,
3397                                               hwaddr addr, uint32_t val,
3398                                               MemTxAttrs attrs,
3399                                               MemTxResult *result,
3400                                               enum device_endian endian)
3401 {
3402     uint8_t *ptr;
3403     MemoryRegion *mr;
3404     hwaddr l = 2;
3405     hwaddr addr1;
3406     MemTxResult r;
3407     bool release_lock = false;
3408
3409     rcu_read_lock();
3410     mr = address_space_translate(as, addr, &addr1, &l, true);
3411     if (l < 2 || !memory_access_is_direct(mr, true)) {
3412         release_lock |= prepare_mmio_access(mr);
3413
3414 #if defined(TARGET_WORDS_BIGENDIAN)
3415         if (endian == DEVICE_LITTLE_ENDIAN) {
3416             val = bswap16(val);
3417         }
3418 #else
3419         if (endian == DEVICE_BIG_ENDIAN) {
3420             val = bswap16(val);
3421         }
3422 #endif
3423         r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3424     } else {
3425         /* RAM case */
3426         addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3427         ptr = qemu_get_ram_ptr(addr1);
3428         switch (endian) {
3429         case DEVICE_LITTLE_ENDIAN:
3430             stw_le_p(ptr, val);
3431             break;
3432         case DEVICE_BIG_ENDIAN:
3433             stw_be_p(ptr, val);
3434             break;
3435         default:
3436             stw_p(ptr, val);
3437             break;
3438         }
3439         invalidate_and_set_dirty(mr, addr1, 2);
3440         r = MEMTX_OK;
3441     }
3442     if (result) {
3443         *result = r;
3444     }
3445     if (release_lock) {
3446         qemu_mutex_unlock_iothread();
3447     }
3448     rcu_read_unlock();
3449 }
3450
3451 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3452                        MemTxAttrs attrs, MemTxResult *result)
3453 {
3454     address_space_stw_internal(as, addr, val, attrs, result,
3455                                DEVICE_NATIVE_ENDIAN);
3456 }
3457
3458 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3459                        MemTxAttrs attrs, MemTxResult *result)
3460 {
3461     address_space_stw_internal(as, addr, val, attrs, result,
3462                                DEVICE_LITTLE_ENDIAN);
3463 }
3464
3465 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3466                        MemTxAttrs attrs, MemTxResult *result)
3467 {
3468     address_space_stw_internal(as, addr, val, attrs, result,
3469                                DEVICE_BIG_ENDIAN);
3470 }
3471
3472 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3473 {
3474     address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3475 }
3476
3477 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3478 {
3479     address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3480 }
3481
3482 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3483 {
3484     address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3485 }
3486
3487 /* XXX: optimize */
3488 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3489                        MemTxAttrs attrs, MemTxResult *result)
3490 {
3491     MemTxResult r;
3492     val = tswap64(val);
3493     r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3494     if (result) {
3495         *result = r;
3496     }
3497 }
3498
3499 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3500                        MemTxAttrs attrs, MemTxResult *result)
3501 {
3502     MemTxResult r;
3503     val = cpu_to_le64(val);
3504     r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3505     if (result) {
3506         *result = r;
3507     }
3508 }
3509 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3510                        MemTxAttrs attrs, MemTxResult *result)
3511 {
3512     MemTxResult r;
3513     val = cpu_to_be64(val);
3514     r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3515     if (result) {
3516         *result = r;
3517     }
3518 }
3519
3520 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3521 {
3522     address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3523 }
3524
3525 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3526 {
3527     address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3528 }
3529
3530 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3531 {
3532     address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3533 }
3534
3535 /* virtual memory access for debug (includes writing to ROM) */
3536 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3537                         uint8_t *buf, int len, int is_write)
3538 {
3539     int l;
3540     hwaddr phys_addr;
3541     target_ulong page;
3542
3543     while (len > 0) {
3544         page = addr & TARGET_PAGE_MASK;
3545         phys_addr = cpu_get_phys_page_debug(cpu, page);
3546         /* if no physical page mapped, return an error */
3547         if (phys_addr == -1)
3548             return -1;
3549         l = (page + TARGET_PAGE_SIZE) - addr;
3550         if (l > len)
3551             l = len;
3552         phys_addr += (addr & ~TARGET_PAGE_MASK);
3553         if (is_write) {
3554             cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
3555         } else {
3556             address_space_rw(cpu->as, phys_addr, MEMTXATTRS_UNSPECIFIED,
3557                              buf, l, 0);
3558         }
3559         len -= l;
3560         buf += l;
3561         addr += l;
3562     }
3563     return 0;
3564 }
3565
3566 /*
3567  * Allows code that needs to deal with migration bitmaps etc to still be built
3568  * target independent.
3569  */
3570 size_t qemu_target_page_bits(void)
3571 {
3572     return TARGET_PAGE_BITS;
3573 }
3574
3575 #endif
3576
3577 /*
3578  * A helper function for the _utterly broken_ virtio device model to find out if
3579  * it's running on a big endian machine. Don't do this at home kids!
3580  */
3581 bool target_words_bigendian(void);
3582 bool target_words_bigendian(void)
3583 {
3584 #if defined(TARGET_WORDS_BIGENDIAN)
3585     return true;
3586 #else
3587     return false;
3588 #endif
3589 }
3590
3591 #ifndef CONFIG_USER_ONLY
3592 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3593 {
3594     MemoryRegion*mr;
3595     hwaddr l = 1;
3596     bool res;
3597
3598     rcu_read_lock();
3599     mr = address_space_translate(&address_space_memory,
3600                                  phys_addr, &phys_addr, &l, false);
3601
3602     res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3603     rcu_read_unlock();
3604     return res;
3605 }
3606
3607 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3608 {
3609     RAMBlock *block;
3610     int ret = 0;
3611
3612     rcu_read_lock();
3613     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3614         ret = func(block->idstr, block->host, block->offset,
3615                    block->used_length, opaque);
3616         if (ret) {
3617             break;
3618         }
3619     }
3620     rcu_read_unlock();
3621     return ret;
3622 }
3623 #endif