exec.c

   1 /*
   2  *  Virtual page mapping
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include "config.h"
  20 #ifndef _WIN32
  21 #include <sys/types.h>
  22 #include <sys/mman.h>
  23 #endif
  24
  25 #include "qemu-common.h"
  26 #include "cpu.h"
  27 #include "tcg.h"
  28 #include "hw/hw.h"
  29 #if !defined(CONFIG_USER_ONLY)
  30 #include "hw/boards.h"
  31 #endif
  32 #include "hw/qdev.h"
  33 #include "qemu/osdep.h"
  34 #include "sysemu/kvm.h"
  35 #include "sysemu/sysemu.h"
  36 #include "hw/xen/xen.h"
  37 #include "qemu/timer.h"
  38 #include "qemu/config-file.h"
  39 #include "qemu/error-report.h"
  40 #include "exec/memory.h"
  41 #include "sysemu/dma.h"
  42 #include "exec/address-spaces.h"
  43 #if defined(CONFIG_USER_ONLY)
  44 #include <qemu.h>
  45 #else /* !CONFIG_USER_ONLY */
  46 #include "sysemu/xen-mapcache.h"
  47 #include "trace.h"
  48 #endif
  49 #include "exec/cpu-all.h"
  50 #include "qemu/rcu_queue.h"
  51 #include "qemu/main-loop.h"
  52 #include "translate-all.h"
  53 #include "sysemu/replay.h"
  54
  55 #include "exec/memory-internal.h"
  56 #include "exec/ram_addr.h"
  57
  58 #include "qemu/range.h"
  59 #ifndef _WIN32
  60 #include "qemu/mmap-alloc.h"
  61 #endif
  62
  63 //#define DEBUG_SUBPAGE
  64
  65 #if !defined(CONFIG_USER_ONLY)
  66 /* ram_list is read under rcu_read_lock()/rcu_read_unlock().  Writes
  67  * are protected by the ramlist lock.
  68  */
  69 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
  70
  71 static MemoryRegion *system_memory;
  72 static MemoryRegion *system_io;
  73
  74 AddressSpace address_space_io;
  75 AddressSpace address_space_memory;
  76
  77 MemoryRegion io_mem_rom, io_mem_notdirty;
  78 static MemoryRegion io_mem_unassigned;
  79
  80 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
  81 #define RAM_PREALLOC   (1 << 0)
  82
  83 /* RAM is mmap-ed with MAP_SHARED */
  84 #define RAM_SHARED     (1 << 1)
  85
  86 /* Only a portion of RAM (used_length) is actually used, and migrated.
  87  * This used_length size can change across reboots.
  88  */
  89 #define RAM_RESIZEABLE (1 << 2)
  90
  91 /* RAM is backed by an mmapped file.
  92  */
  93 #define RAM_FILE (1 << 3)
  94 #endif
  95
  96 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
  97 /* current CPU in the current thread. It is only valid inside
  98    cpu_exec() */
  99 __thread CPUState *current_cpu;
 100 /* 0 = Do not count executed instructions.
 101    1 = Precise instruction counting.
 102    2 = Adaptive rate instruction counting.  */
 103 int use_icount;
 104
 105 #if !defined(CONFIG_USER_ONLY)
 106
 107 typedef struct PhysPageEntry PhysPageEntry;
 108
 109 struct PhysPageEntry {
 110     /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
 111     uint32_t skip : 6;
 112      /* index into phys_sections (!skip) or phys_map_nodes (skip) */
 113     uint32_t ptr : 26;
 114 };
 115
 116 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
 117
 118 /* Size of the L2 (and L3, etc) page tables.  */
 119 #define ADDR_SPACE_BITS 64
 120
 121 #define P_L2_BITS 9
 122 #define P_L2_SIZE (1 << P_L2_BITS)
 123
 124 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
 125
 126 typedef PhysPageEntry Node[P_L2_SIZE];
 127
 128 typedef struct PhysPageMap {
 129     struct rcu_head rcu;
 130
 131     unsigned sections_nb;
 132     unsigned sections_nb_alloc;
 133     unsigned nodes_nb;
 134     unsigned nodes_nb_alloc;
 135     Node *nodes;
 136     MemoryRegionSection *sections;
 137 } PhysPageMap;
 138
 139 struct AddressSpaceDispatch {
 140     struct rcu_head rcu;
 141
 142     /* This is a multi-level map on the physical address space.
 143      * The bottom level has pointers to MemoryRegionSections.
 144      */
 145     PhysPageEntry phys_map;
 146     PhysPageMap map;
 147     AddressSpace *as;
 148 };
 149
 150 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
 151 typedef struct subpage_t {
 152     MemoryRegion iomem;
 153     AddressSpace *as;
 154     hwaddr base;
 155     uint16_t sub_section[TARGET_PAGE_SIZE];
 156 } subpage_t;
 157
 158 #define PHYS_SECTION_UNASSIGNED 0
 159 #define PHYS_SECTION_NOTDIRTY 1
 160 #define PHYS_SECTION_ROM 2
 161 #define PHYS_SECTION_WATCH 3
 162
 163 static void io_mem_init(void);
 164 static void memory_map_init(void);
 165 static void tcg_commit(MemoryListener *listener);
 166
 167 static MemoryRegion io_mem_watch;
 168
 169 /**
 170  * CPUAddressSpace: all the information a CPU needs about an AddressSpace
 171  * @cpu: the CPU whose AddressSpace this is
 172  * @as: the AddressSpace itself
 173  * @memory_dispatch: its dispatch pointer (cached, RCU protected)
 174  * @tcg_as_listener: listener for tracking changes to the AddressSpace
 175  */
 176 struct CPUAddressSpace {
 177     CPUState *cpu;
 178     AddressSpace *as;
 179     struct AddressSpaceDispatch *memory_dispatch;
 180     MemoryListener tcg_as_listener;
 181 };
 182
 183 #endif
 184
 185 #if !defined(CONFIG_USER_ONLY)
 186
 187 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
 188 {
 189     if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
 190         map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
 191         map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
 192         map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
 193     }
 194 }
 195
 196 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
 197 {
 198     unsigned i;
 199     uint32_t ret;
 200     PhysPageEntry e;
 201     PhysPageEntry *p;
 202
 203     ret = map->nodes_nb++;
 204     p = map->nodes[ret];
 205     assert(ret != PHYS_MAP_NODE_NIL);
 206     assert(ret != map->nodes_nb_alloc);
 207
 208     e.skip = leaf ? 0 : 1;
 209     e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
 210     for (i = 0; i < P_L2_SIZE; ++i) {
 211         memcpy(&p[i], &e, sizeof(e));
 212     }
 213     return ret;
 214 }
 215
 216 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
 217                                 hwaddr *index, hwaddr *nb, uint16_t leaf,
 218                                 int level)
 219 {
 220     PhysPageEntry *p;
 221     hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
 222
 223     if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
 224         lp->ptr = phys_map_node_alloc(map, level == 0);
 225     }
 226     p = map->nodes[lp->ptr];
 227     lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
 228
 229     while (*nb && lp < &p[P_L2_SIZE]) {
 230         if ((*index & (step - 1)) == 0 && *nb >= step) {
 231             lp->skip = 0;
 232             lp->ptr = leaf;
 233             *index += step;
 234             *nb -= step;
 235         } else {
 236             phys_page_set_level(map, lp, index, nb, leaf, level - 1);
 237         }
 238         ++lp;
 239     }
 240 }
 241
 242 static void phys_page_set(AddressSpaceDispatch *d,
 243                           hwaddr index, hwaddr nb,
 244                           uint16_t leaf)
 245 {
 246     /* Wildly overreserve - it doesn't matter much. */
 247     phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
 248
 249     phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
 250 }
 251
 252 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
 253  * and update our entry so we can skip it and go directly to the destination.
 254  */
 255 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
 256 {
 257     unsigned valid_ptr = P_L2_SIZE;
 258     int valid = 0;
 259     PhysPageEntry *p;
 260     int i;
 261
 262     if (lp->ptr == PHYS_MAP_NODE_NIL) {
 263         return;
 264     }
 265
 266     p = nodes[lp->ptr];
 267     for (i = 0; i < P_L2_SIZE; i++) {
 268         if (p[i].ptr == PHYS_MAP_NODE_NIL) {
 269             continue;
 270         }
 271
 272         valid_ptr = i;
 273         valid++;
 274         if (p[i].skip) {
 275             phys_page_compact(&p[i], nodes, compacted);
 276         }
 277     }
 278
 279     /* We can only compress if there's only one child. */
 280     if (valid != 1) {
 281         return;
 282     }
 283
 284     assert(valid_ptr < P_L2_SIZE);
 285
 286     /* Don't compress if it won't fit in the # of bits we have. */
 287     if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
 288         return;
 289     }
 290
 291     lp->ptr = p[valid_ptr].ptr;
 292     if (!p[valid_ptr].skip) {
 293         /* If our only child is a leaf, make this a leaf. */
 294         /* By design, we should have made this node a leaf to begin with so we
 295          * should never reach here.
 296          * But since it's so simple to handle this, let's do it just in case we
 297          * change this rule.
 298          */
 299         lp->skip = 0;
 300     } else {
 301         lp->skip += p[valid_ptr].skip;
 302     }
 303 }
 304
 305 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
 306 {
 307     DECLARE_BITMAP(compacted, nodes_nb);
 308
 309     if (d->phys_map.skip) {
 310         phys_page_compact(&d->phys_map, d->map.nodes, compacted);
 311     }
 312 }
 313
 314 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
 315                                            Node *nodes, MemoryRegionSection *sections)
 316 {
 317     PhysPageEntry *p;
 318     hwaddr index = addr >> TARGET_PAGE_BITS;
 319     int i;
 320
 321     for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
 322         if (lp.ptr == PHYS_MAP_NODE_NIL) {
 323             return &sections[PHYS_SECTION_UNASSIGNED];
 324         }
 325         p = nodes[lp.ptr];
 326         lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
 327     }
 328
 329     if (sections[lp.ptr].size.hi ||
 330         range_covers_byte(sections[lp.ptr].offset_within_address_space,
 331                           sections[lp.ptr].size.lo, addr)) {
 332         return &sections[lp.ptr];
 333     } else {
 334         return &sections[PHYS_SECTION_UNASSIGNED];
 335     }
 336 }
 337
 338 bool memory_region_is_unassigned(MemoryRegion *mr)
 339 {
 340     return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
 341         && mr != &io_mem_watch;
 342 }
 343
 344 /* Called from RCU critical section */
 345 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
 346                                                         hwaddr addr,
 347                                                         bool resolve_subpage)
 348 {
 349     MemoryRegionSection *section;
 350     subpage_t *subpage;
 351
 352     section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
 353     if (resolve_subpage && section->mr->subpage) {
 354         subpage = container_of(section->mr, subpage_t, iomem);
 355         section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
 356     }
 357     return section;
 358 }
 359
 360 /* Called from RCU critical section */
 361 static MemoryRegionSection *
 362 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
 363                                  hwaddr *plen, bool resolve_subpage)
 364 {
 365     MemoryRegionSection *section;
 366     MemoryRegion *mr;
 367     Int128 diff;
 368
 369     section = address_space_lookup_region(d, addr, resolve_subpage);
 370     /* Compute offset within MemoryRegionSection */
 371     addr -= section->offset_within_address_space;
 372
 373     /* Compute offset within MemoryRegion */
 374     *xlat = addr + section->offset_within_region;
 375
 376     mr = section->mr;
 377
 378     /* MMIO registers can be expected to perform full-width accesses based only
 379      * on their address, without considering adjacent registers that could
 380      * decode to completely different MemoryRegions.  When such registers
 381      * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
 382      * regions overlap wildly.  For this reason we cannot clamp the accesses
 383      * here.
 384      *
 385      * If the length is small (as is the case for address_space_ldl/stl),
 386      * everything works fine.  If the incoming length is large, however,
 387      * the caller really has to do the clamping through memory_access_size.
 388      */
 389     if (memory_region_is_ram(mr)) {
 390         diff = int128_sub(section->size, int128_make64(addr));
 391         *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
 392     }
 393     return section;
 394 }
 395
 396 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
 397 {
 398     if (memory_region_is_ram(mr)) {
 399         return !(is_write && mr->readonly);
 400     }
 401     if (memory_region_is_romd(mr)) {
 402         return !is_write;
 403     }
 404
 405     return false;
 406 }
 407
 408 /* Called from RCU critical section */
 409 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
 410                                       hwaddr *xlat, hwaddr *plen,
 411                                       bool is_write)
 412 {
 413     IOMMUTLBEntry iotlb;
 414     MemoryRegionSection *section;
 415     MemoryRegion *mr;
 416
 417     for (;;) {
 418         AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
 419         section = address_space_translate_internal(d, addr, &addr, plen, true);
 420         mr = section->mr;
 421
 422         if (!mr->iommu_ops) {
 423             break;
 424         }
 425
 426         iotlb = mr->iommu_ops->translate(mr, addr, is_write);
 427         addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
 428                 | (addr & iotlb.addr_mask));
 429         *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
 430         if (!(iotlb.perm & (1 << is_write))) {
 431             mr = &io_mem_unassigned;
 432             break;
 433         }
 434
 435         as = iotlb.target_as;
 436     }
 437
 438     if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
 439         hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
 440         *plen = MIN(page, *plen);
 441     }
 442
 443     *xlat = addr;
 444     return mr;
 445 }
 446
 447 /* Called from RCU critical section */
 448 MemoryRegionSection *
 449 address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
 450                                   hwaddr *xlat, hwaddr *plen)
 451 {
 452     MemoryRegionSection *section;
 453     section = address_space_translate_internal(cpu->cpu_ases[0].memory_dispatch,
 454                                                addr, xlat, plen, false);
 455
 456     assert(!section->mr->iommu_ops);
 457     return section;
 458 }
 459 #endif
 460
 461 #if !defined(CONFIG_USER_ONLY)
 462
 463 static int cpu_common_post_load(void *opaque, int version_id)
 464 {
 465     CPUState *cpu = opaque;
 466
 467     /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
 468        version_id is increased. */
 469     cpu->interrupt_request &= ~0x01;
 470     tlb_flush(cpu, 1);
 471
 472     return 0;
 473 }
 474
 475 static int cpu_common_pre_load(void *opaque)
 476 {
 477     CPUState *cpu = opaque;
 478
 479     cpu->exception_index = -1;
 480
 481     return 0;
 482 }
 483
 484 static bool cpu_common_exception_index_needed(void *opaque)
 485 {
 486     CPUState *cpu = opaque;
 487
 488     return tcg_enabled() && cpu->exception_index != -1;
 489 }
 490
 491 static const VMStateDescription vmstate_cpu_common_exception_index = {
 492     .name = "cpu_common/exception_index",
 493     .version_id = 1,
 494     .minimum_version_id = 1,
 495     .needed = cpu_common_exception_index_needed,
 496     .fields = (VMStateField[]) {
 497         VMSTATE_INT32(exception_index, CPUState),
 498         VMSTATE_END_OF_LIST()
 499     }
 500 };
 501
 502 static bool cpu_common_crash_occurred_needed(void *opaque)
 503 {
 504     CPUState *cpu = opaque;
 505
 506     return cpu->crash_occurred;
 507 }
 508
 509 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
 510     .name = "cpu_common/crash_occurred",
 511     .version_id = 1,
 512     .minimum_version_id = 1,
 513     .needed = cpu_common_crash_occurred_needed,
 514     .fields = (VMStateField[]) {
 515         VMSTATE_BOOL(crash_occurred, CPUState),
 516         VMSTATE_END_OF_LIST()
 517     }
 518 };
 519
 520 const VMStateDescription vmstate_cpu_common = {
 521     .name = "cpu_common",
 522     .version_id = 1,
 523     .minimum_version_id = 1,
 524     .pre_load = cpu_common_pre_load,
 525     .post_load = cpu_common_post_load,
 526     .fields = (VMStateField[]) {
 527         VMSTATE_UINT32(halted, CPUState),
 528         VMSTATE_UINT32(interrupt_request, CPUState),
 529         VMSTATE_END_OF_LIST()
 530     },
 531     .subsections = (const VMStateDescription*[]) {
 532         &vmstate_cpu_common_exception_index,
 533         &vmstate_cpu_common_crash_occurred,
 534         NULL
 535     }
 536 };
 537
 538 #endif
 539
 540 CPUState *qemu_get_cpu(int index)
 541 {
 542     CPUState *cpu;
 543
 544     CPU_FOREACH(cpu) {
 545         if (cpu->cpu_index == index) {
 546             return cpu;
 547         }
 548     }
 549
 550     return NULL;
 551 }
 552
 553 #if !defined(CONFIG_USER_ONLY)
 554 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
 555 {
 556     /* We only support one address space per cpu at the moment.  */
 557     assert(cpu->as == as);
 558
 559     if (cpu->cpu_ases) {
 560         /* We've already registered the listener for our only AS */
 561         return;
 562     }
 563
 564     cpu->cpu_ases = g_new0(CPUAddressSpace, 1);
 565     cpu->cpu_ases[0].cpu = cpu;
 566     cpu->cpu_ases[0].as = as;
 567     cpu->cpu_ases[0].tcg_as_listener.commit = tcg_commit;
 568     memory_listener_register(&cpu->cpu_ases[0].tcg_as_listener, as);
 569 }
 570 #endif
 571
 572 #ifndef CONFIG_USER_ONLY
 573 static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
 574
 575 static int cpu_get_free_index(Error **errp)
 576 {
 577     int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
 578
 579     if (cpu >= MAX_CPUMASK_BITS) {
 580         error_setg(errp, "Trying to use more CPUs than max of %d",
 581                    MAX_CPUMASK_BITS);
 582         return -1;
 583     }
 584
 585     bitmap_set(cpu_index_map, cpu, 1);
 586     return cpu;
 587 }
 588
 589 void cpu_exec_exit(CPUState *cpu)
 590 {
 591     if (cpu->cpu_index == -1) {
 592         /* cpu_index was never allocated by this @cpu or was already freed. */
 593         return;
 594     }
 595
 596     bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
 597     cpu->cpu_index = -1;
 598 }
 599 #else
 600
 601 static int cpu_get_free_index(Error **errp)
 602 {
 603     CPUState *some_cpu;
 604     int cpu_index = 0;
 605
 606     CPU_FOREACH(some_cpu) {
 607         cpu_index++;
 608     }
 609     return cpu_index;
 610 }
 611
 612 void cpu_exec_exit(CPUState *cpu)
 613 {
 614 }
 615 #endif
 616
 617 void cpu_exec_init(CPUState *cpu, Error **errp)
 618 {
 619     CPUClass *cc = CPU_GET_CLASS(cpu);
 620     int cpu_index;
 621     Error *local_err = NULL;
 622
 623 #ifdef TARGET_WORDS_BIGENDIAN
 624     cpu->bigendian = true;
 625 #else
 626     cpu->bigendian = false;
 627 #endif
 628
 629 #ifndef CONFIG_USER_ONLY
 630     cpu->as = &address_space_memory;
 631     cpu->thread_id = qemu_get_thread_id();
 632 #endif
 633
 634 #if defined(CONFIG_USER_ONLY)
 635     cpu_list_lock();
 636 #endif
 637     cpu_index = cpu->cpu_index = cpu_get_free_index(&local_err);
 638     if (local_err) {
 639         error_propagate(errp, local_err);
 640 #if defined(CONFIG_USER_ONLY)
 641         cpu_list_unlock();
 642 #endif
 643         return;
 644     }
 645     QTAILQ_INSERT_TAIL(&cpus, cpu, node);
 646 #if defined(CONFIG_USER_ONLY)
 647     cpu_list_unlock();
 648 #endif
 649     if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
 650         vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
 651     }
 652 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
 653     register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
 654                     cpu_save, cpu_load, cpu->env_ptr);
 655     assert(cc->vmsd == NULL);
 656     assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
 657 #endif
 658     if (cc->vmsd != NULL) {
 659         vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
 660     }
 661 }
 662
 663 #if defined(CONFIG_USER_ONLY)
 664 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 665 {
 666     tb_invalidate_phys_page_range(pc, pc + 1, 0);
 667 }
 668 #else
 669 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 670 {
 671     hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
 672     if (phys != -1) {
 673         tb_invalidate_phys_addr(cpu->as,
 674                                 phys | (pc & ~TARGET_PAGE_MASK));
 675     }
 676 }
 677 #endif
 678
 679 #if defined(CONFIG_USER_ONLY)
 680 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 681
 682 {
 683 }
 684
 685 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 686                           int flags)
 687 {
 688     return -ENOSYS;
 689 }
 690
 691 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 692 {
 693 }
 694
 695 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 696                           int flags, CPUWatchpoint **watchpoint)
 697 {
 698     return -ENOSYS;
 699 }
 700 #else
 701 /* Add a watchpoint.  */
 702 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 703                           int flags, CPUWatchpoint **watchpoint)
 704 {
 705     CPUWatchpoint *wp;
 706
 707     /* forbid ranges which are empty or run off the end of the address space */
 708     if (len == 0 || (addr + len - 1) < addr) {
 709         error_report("tried to set invalid watchpoint at %"
 710                      VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
 711         return -EINVAL;
 712     }
 713     wp = g_malloc(sizeof(*wp));
 714
 715     wp->vaddr = addr;
 716     wp->len = len;
 717     wp->flags = flags;
 718
 719     /* keep all GDB-injected watchpoints in front */
 720     if (flags & BP_GDB) {
 721         QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
 722     } else {
 723         QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
 724     }
 725
 726     tlb_flush_page(cpu, addr);
 727
 728     if (watchpoint)
 729         *watchpoint = wp;
 730     return 0;
 731 }
 732
 733 /* Remove a specific watchpoint.  */
 734 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 735                           int flags)
 736 {
 737     CPUWatchpoint *wp;
 738
 739     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
 740         if (addr == wp->vaddr && len == wp->len
 741                 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
 742             cpu_watchpoint_remove_by_ref(cpu, wp);
 743             return 0;
 744         }
 745     }
 746     return -ENOENT;
 747 }
 748
 749 /* Remove a specific watchpoint by reference.  */
 750 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 751 {
 752     QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
 753
 754     tlb_flush_page(cpu, watchpoint->vaddr);
 755
 756     g_free(watchpoint);
 757 }
 758
 759 /* Remove all matching watchpoints.  */
 760 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 761 {
 762     CPUWatchpoint *wp, *next;
 763
 764     QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
 765         if (wp->flags & mask) {
 766             cpu_watchpoint_remove_by_ref(cpu, wp);
 767         }
 768     }
 769 }
 770
 771 /* Return true if this watchpoint address matches the specified
 772  * access (ie the address range covered by the watchpoint overlaps
 773  * partially or completely with the address range covered by the
 774  * access).
 775  */
 776 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
 777                                                   vaddr addr,
 778                                                   vaddr len)
 779 {
 780     /* We know the lengths are non-zero, but a little caution is
 781      * required to avoid errors in the case where the range ends
 782      * exactly at the top of the address space and so addr + len
 783      * wraps round to zero.
 784      */
 785     vaddr wpend = wp->vaddr + wp->len - 1;
 786     vaddr addrend = addr + len - 1;
 787
 788     return !(addr > wpend || wp->vaddr > addrend);
 789 }
 790
 791 #endif
 792
 793 /* Add a breakpoint.  */
 794 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
 795                           CPUBreakpoint **breakpoint)
 796 {
 797     CPUBreakpoint *bp;
 798
 799     bp = g_malloc(sizeof(*bp));
 800
 801     bp->pc = pc;
 802     bp->flags = flags;
 803
 804     /* keep all GDB-injected breakpoints in front */
 805     if (flags & BP_GDB) {
 806         QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
 807     } else {
 808         QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
 809     }
 810
 811     breakpoint_invalidate(cpu, pc);
 812
 813     if (breakpoint) {
 814         *breakpoint = bp;
 815     }
 816     return 0;
 817 }
 818
 819 /* Remove a specific breakpoint.  */
 820 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
 821 {
 822     CPUBreakpoint *bp;
 823
 824     QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
 825         if (bp->pc == pc && bp->flags == flags) {
 826             cpu_breakpoint_remove_by_ref(cpu, bp);
 827             return 0;
 828         }
 829     }
 830     return -ENOENT;
 831 }
 832
 833 /* Remove a specific breakpoint by reference.  */
 834 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
 835 {
 836     QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
 837
 838     breakpoint_invalidate(cpu, breakpoint->pc);
 839
 840     g_free(breakpoint);
 841 }
 842
 843 /* Remove all matching breakpoints. */
 844 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
 845 {
 846     CPUBreakpoint *bp, *next;
 847
 848     QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
 849         if (bp->flags & mask) {
 850             cpu_breakpoint_remove_by_ref(cpu, bp);
 851         }
 852     }
 853 }
 854
 855 /* enable or disable single step mode. EXCP_DEBUG is returned by the
 856    CPU loop after each instruction */
 857 void cpu_single_step(CPUState *cpu, int enabled)
 858 {
 859     if (cpu->singlestep_enabled != enabled) {
 860         cpu->singlestep_enabled = enabled;
 861         if (kvm_enabled()) {
 862             kvm_update_guest_debug(cpu, 0);
 863         } else {
 864             /* must flush all the translated code to avoid inconsistencies */
 865             /* XXX: only flush what is necessary */
 866             tb_flush(cpu);
 867         }
 868     }
 869 }
 870
 871 void QEMU_NORETURN cpu_abort(CPUState *cpu, const char *fmt, ...)
 872 {
 873     va_list ap;
 874     va_list ap2;
 875
 876     va_start(ap, fmt);
 877     va_copy(ap2, ap);
 878     fprintf(stderr, "qemu: fatal: ");
 879     vfprintf(stderr, fmt, ap);
 880     fprintf(stderr, "\n");
 881     cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 882     if (qemu_log_enabled()) {
 883         qemu_log("qemu: fatal: ");
 884         qemu_log_vprintf(fmt, ap2);
 885         qemu_log("\n");
 886         log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 887         qemu_log_flush();
 888         qemu_log_close();
 889     }
 890     va_end(ap2);
 891     va_end(ap);
 892     replay_finish();
 893 #if defined(CONFIG_USER_ONLY)
 894     {
 895         struct sigaction act;
 896         sigfillset(&act.sa_mask);
 897         act.sa_handler = SIG_DFL;
 898         sigaction(SIGABRT, &act, NULL);
 899     }
 900 #endif
 901     abort();
 902 }
 903
 904 #if !defined(CONFIG_USER_ONLY)
 905 /* Called from RCU critical section */
 906 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
 907 {
 908     RAMBlock *block;
 909
 910     block = atomic_rcu_read(&ram_list.mru_block);
 911     if (block && addr - block->offset < block->max_length) {
 912         return block;
 913     }
 914     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
 915         if (addr - block->offset < block->max_length) {
 916             goto found;
 917         }
 918     }
 919
 920     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
 921     abort();
 922
 923 found:
 924     /* It is safe to write mru_block outside the iothread lock.  This
 925      * is what happens:
 926      *
 927      *     mru_block = xxx
 928      *     rcu_read_unlock()
 929      *                                        xxx removed from list
 930      *                  rcu_read_lock()
 931      *                  read mru_block
 932      *                                        mru_block = NULL;
 933      *                                        call_rcu(reclaim_ramblock, xxx);
 934      *                  rcu_read_unlock()
 935      *
 936      * atomic_rcu_set is not needed here.  The block was already published
 937      * when it was placed into the list.  Here we're just making an extra
 938      * copy of the pointer.
 939      */
 940     ram_list.mru_block = block;
 941     return block;
 942 }
 943
 944 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
 945 {
 946     CPUState *cpu;
 947     ram_addr_t start1;
 948     RAMBlock *block;
 949     ram_addr_t end;
 950
 951     end = TARGET_PAGE_ALIGN(start + length);
 952     start &= TARGET_PAGE_MASK;
 953
 954     rcu_read_lock();
 955     block = qemu_get_ram_block(start);
 956     assert(block == qemu_get_ram_block(end - 1));
 957     start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
 958     CPU_FOREACH(cpu) {
 959         tlb_reset_dirty(cpu, start1, length);
 960     }
 961     rcu_read_unlock();
 962 }
 963
 964 /* Note: start and end must be within the same ram block.  */
 965 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
 966                                               ram_addr_t length,
 967                                               unsigned client)
 968 {
 969     unsigned long end, page;
 970     bool dirty;
 971
 972     if (length == 0) {
 973         return false;
 974     }
 975
 976     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
 977     page = start >> TARGET_PAGE_BITS;
 978     dirty = bitmap_test_and_clear_atomic(ram_list.dirty_memory[client],
 979                                          page, end - page);
 980
 981     if (dirty && tcg_enabled()) {
 982         tlb_reset_dirty_range_all(start, length);
 983     }
 984
 985     return dirty;
 986 }
 987
 988 /* Called from RCU critical section */
 989 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
 990                                        MemoryRegionSection *section,
 991                                        target_ulong vaddr,
 992                                        hwaddr paddr, hwaddr xlat,
 993                                        int prot,
 994                                        target_ulong *address)
 995 {
 996     hwaddr iotlb;
 997     CPUWatchpoint *wp;
 998
 999     if (memory_region_is_ram(section->mr)) {
1000         /* Normal RAM.  */
1001         iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1002             + xlat;
1003         if (!section->readonly) {
1004             iotlb |= PHYS_SECTION_NOTDIRTY;
1005         } else {
1006             iotlb |= PHYS_SECTION_ROM;
1007         }
1008     } else {
1009         AddressSpaceDispatch *d;
1010
1011         d = atomic_rcu_read(&section->address_space->dispatch);
1012         iotlb = section - d->map.sections;
1013         iotlb += xlat;
1014     }
1015
1016     /* Make accesses to pages with watchpoints go via the
1017        watchpoint trap routines.  */
1018     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1019         if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1020             /* Avoid trapping reads of pages with a write breakpoint. */
1021             if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1022                 iotlb = PHYS_SECTION_WATCH + paddr;
1023                 *address |= TLB_MMIO;
1024                 break;
1025             }
1026         }
1027     }
1028
1029     return iotlb;
1030 }
1031 #endif /* defined(CONFIG_USER_ONLY) */
1032
1033 #if !defined(CONFIG_USER_ONLY)
1034
1035 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1036                              uint16_t section);
1037 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1038
1039 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1040                                qemu_anon_ram_alloc;
1041
1042 /*
1043  * Set a custom physical guest memory alloator.
1044  * Accelerators with unusual needs may need this.  Hopefully, we can
1045  * get rid of it eventually.
1046  */
1047 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1048 {
1049     phys_mem_alloc = alloc;
1050 }
1051
1052 static uint16_t phys_section_add(PhysPageMap *map,
1053                                  MemoryRegionSection *section)
1054 {
1055     /* The physical section number is ORed with a page-aligned
1056      * pointer to produce the iotlb entries.  Thus it should
1057      * never overflow into the page-aligned value.
1058      */
1059     assert(map->sections_nb < TARGET_PAGE_SIZE);
1060
1061     if (map->sections_nb == map->sections_nb_alloc) {
1062         map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1063         map->sections = g_renew(MemoryRegionSection, map->sections,
1064                                 map->sections_nb_alloc);
1065     }
1066     map->sections[map->sections_nb] = *section;
1067     memory_region_ref(section->mr);
1068     return map->sections_nb++;
1069 }
1070
1071 static void phys_section_destroy(MemoryRegion *mr)
1072 {
1073     memory_region_unref(mr);
1074
1075     if (mr->subpage) {
1076         subpage_t *subpage = container_of(mr, subpage_t, iomem);
1077         object_unref(OBJECT(&subpage->iomem));
1078         g_free(subpage);
1079     }
1080 }
1081
1082 static void phys_sections_free(PhysPageMap *map)
1083 {
1084     while (map->sections_nb > 0) {
1085         MemoryRegionSection *section = &map->sections[--map->sections_nb];
1086         phys_section_destroy(section->mr);
1087     }
1088     g_free(map->sections);
1089     g_free(map->nodes);
1090 }
1091
1092 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1093 {
1094     subpage_t *subpage;
1095     hwaddr base = section->offset_within_address_space
1096         & TARGET_PAGE_MASK;
1097     MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1098                                                    d->map.nodes, d->map.sections);
1099     MemoryRegionSection subsection = {
1100         .offset_within_address_space = base,
1101         .size = int128_make64(TARGET_PAGE_SIZE),
1102     };
1103     hwaddr start, end;
1104
1105     assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1106
1107     if (!(existing->mr->subpage)) {
1108         subpage = subpage_init(d->as, base);
1109         subsection.address_space = d->as;
1110         subsection.mr = &subpage->iomem;
1111         phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1112                       phys_section_add(&d->map, &subsection));
1113     } else {
1114         subpage = container_of(existing->mr, subpage_t, iomem);
1115     }
1116     start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1117     end = start + int128_get64(section->size) - 1;
1118     subpage_register(subpage, start, end,
1119                      phys_section_add(&d->map, section));
1120 }
1121
1122
1123 static void register_multipage(AddressSpaceDispatch *d,
1124                                MemoryRegionSection *section)
1125 {
1126     hwaddr start_addr = section->offset_within_address_space;
1127     uint16_t section_index = phys_section_add(&d->map, section);
1128     uint64_t num_pages = int128_get64(int128_rshift(section->size,
1129                                                     TARGET_PAGE_BITS));
1130
1131     assert(num_pages);
1132     phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1133 }
1134
1135 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1136 {
1137     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1138     AddressSpaceDispatch *d = as->next_dispatch;
1139     MemoryRegionSection now = *section, remain = *section;
1140     Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1141
1142     if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1143         uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1144                        - now.offset_within_address_space;
1145
1146         now.size = int128_min(int128_make64(left), now.size);
1147         register_subpage(d, &now);
1148     } else {
1149         now.size = int128_zero();
1150     }
1151     while (int128_ne(remain.size, now.size)) {
1152         remain.size = int128_sub(remain.size, now.size);
1153         remain.offset_within_address_space += int128_get64(now.size);
1154         remain.offset_within_region += int128_get64(now.size);
1155         now = remain;
1156         if (int128_lt(remain.size, page_size)) {
1157             register_subpage(d, &now);
1158         } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1159             now.size = page_size;
1160             register_subpage(d, &now);
1161         } else {
1162             now.size = int128_and(now.size, int128_neg(page_size));
1163             register_multipage(d, &now);
1164         }
1165     }
1166 }
1167
1168 void qemu_flush_coalesced_mmio_buffer(void)
1169 {
1170     if (kvm_enabled())
1171         kvm_flush_coalesced_mmio_buffer();
1172 }
1173
1174 void qemu_mutex_lock_ramlist(void)
1175 {
1176     qemu_mutex_lock(&ram_list.mutex);
1177 }
1178
1179 void qemu_mutex_unlock_ramlist(void)
1180 {
1181     qemu_mutex_unlock(&ram_list.mutex);
1182 }
1183
1184 #ifdef __linux__
1185
1186 #include <sys/vfs.h>
1187
1188 #define HUGETLBFS_MAGIC       0x958458f6
1189
1190 static long gethugepagesize(const char *path, Error **errp)
1191 {
1192     struct statfs fs;
1193     int ret;
1194
1195     do {
1196         ret = statfs(path, &fs);
1197     } while (ret != 0 && errno == EINTR);
1198
1199     if (ret != 0) {
1200         error_setg_errno(errp, errno, "failed to get page size of file %s",
1201                          path);
1202         return 0;
1203     }
1204
1205     if (fs.f_type != HUGETLBFS_MAGIC)
1206         fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1207
1208     return fs.f_bsize;
1209 }
1210
1211 static void *file_ram_alloc(RAMBlock *block,
1212                             ram_addr_t memory,
1213                             const char *path,
1214                             Error **errp)
1215 {
1216     struct stat st;
1217     char *filename;
1218     char *sanitized_name;
1219     char *c;
1220     void * volatile area = NULL;
1221     int fd;
1222     uint64_t hpagesize;
1223     Error *local_err = NULL;
1224
1225     hpagesize = gethugepagesize(path, &local_err);
1226     if (local_err) {
1227         error_propagate(errp, local_err);
1228         goto error;
1229     }
1230     block->mr->align = hpagesize;
1231
1232     if (memory < hpagesize) {
1233         error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1234                    "or larger than huge page size 0x%" PRIx64,
1235                    memory, hpagesize);
1236         goto error;
1237     }
1238
1239     if (kvm_enabled() && !kvm_has_sync_mmu()) {
1240         error_setg(errp,
1241                    "host lacks kvm mmu notifiers, -mem-path unsupported");
1242         goto error;
1243     }
1244
1245     if (!stat(path, &st) && S_ISDIR(st.st_mode)) {
1246         /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1247         sanitized_name = g_strdup(memory_region_name(block->mr));
1248         for (c = sanitized_name; *c != '\0'; c++) {
1249             if (*c == '/') {
1250                 *c = '_';
1251             }
1252         }
1253
1254         filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1255                                    sanitized_name);
1256         g_free(sanitized_name);
1257
1258         fd = mkstemp(filename);
1259         if (fd >= 0) {
1260             unlink(filename);
1261         }
1262         g_free(filename);
1263     } else {
1264         fd = open(path, O_RDWR | O_CREAT, 0644);
1265     }
1266
1267     if (fd < 0) {
1268         error_setg_errno(errp, errno,
1269                          "unable to create backing store for hugepages");
1270         goto error;
1271     }
1272
1273     memory = ROUND_UP(memory, hpagesize);
1274
1275     /*
1276      * ftruncate is not supported by hugetlbfs in older
1277      * hosts, so don't bother bailing out on errors.
1278      * If anything goes wrong with it under other filesystems,
1279      * mmap will fail.
1280      */
1281     if (ftruncate(fd, memory)) {
1282         perror("ftruncate");
1283     }
1284
1285     area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
1286     if (area == MAP_FAILED) {
1287         error_setg_errno(errp, errno,
1288                          "unable to map backing store for hugepages");
1289         close(fd);
1290         goto error;
1291     }
1292
1293     if (mem_prealloc) {
1294         os_mem_prealloc(fd, area, memory);
1295     }
1296
1297     block->fd = fd;
1298     return area;
1299
1300 error:
1301     return NULL;
1302 }
1303 #endif
1304
1305 /* Called with the ramlist lock held.  */
1306 static ram_addr_t find_ram_offset(ram_addr_t size)
1307 {
1308     RAMBlock *block, *next_block;
1309     ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1310
1311     assert(size != 0); /* it would hand out same offset multiple times */
1312
1313     if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1314         return 0;
1315     }
1316
1317     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1318         ram_addr_t end, next = RAM_ADDR_MAX;
1319
1320         end = block->offset + block->max_length;
1321
1322         QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1323             if (next_block->offset >= end) {
1324                 next = MIN(next, next_block->offset);
1325             }
1326         }
1327         if (next - end >= size && next - end < mingap) {
1328             offset = end;
1329             mingap = next - end;
1330         }
1331     }
1332
1333     if (offset == RAM_ADDR_MAX) {
1334         fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1335                 (uint64_t)size);
1336         abort();
1337     }
1338
1339     return offset;
1340 }
1341
1342 ram_addr_t last_ram_offset(void)
1343 {
1344     RAMBlock *block;
1345     ram_addr_t last = 0;
1346
1347     rcu_read_lock();
1348     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1349         last = MAX(last, block->offset + block->max_length);
1350     }
1351     rcu_read_unlock();
1352     return last;
1353 }
1354
1355 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1356 {
1357     int ret;
1358
1359     /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1360     if (!machine_dump_guest_core(current_machine)) {
1361         ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1362         if (ret) {
1363             perror("qemu_madvise");
1364             fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1365                             "but dump_guest_core=off specified\n");
1366         }
1367     }
1368 }
1369
1370 /* Called within an RCU critical section, or while the ramlist lock
1371  * is held.
1372  */
1373 static RAMBlock *find_ram_block(ram_addr_t addr)
1374 {
1375     RAMBlock *block;
1376
1377     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1378         if (block->offset == addr) {
1379             return block;
1380         }
1381     }
1382
1383     return NULL;
1384 }
1385
1386 const char *qemu_ram_get_idstr(RAMBlock *rb)
1387 {
1388     return rb->idstr;
1389 }
1390
1391 /* Called with iothread lock held.  */
1392 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1393 {
1394     RAMBlock *new_block, *block;
1395
1396     rcu_read_lock();
1397     new_block = find_ram_block(addr);
1398     assert(new_block);
1399     assert(!new_block->idstr[0]);
1400
1401     if (dev) {
1402         char *id = qdev_get_dev_path(dev);
1403         if (id) {
1404             snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1405             g_free(id);
1406         }
1407     }
1408     pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1409
1410     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1411         if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1412             fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1413                     new_block->idstr);
1414             abort();
1415         }
1416     }
1417     rcu_read_unlock();
1418 }
1419
1420 /* Called with iothread lock held.  */
1421 void qemu_ram_unset_idstr(ram_addr_t addr)
1422 {
1423     RAMBlock *block;
1424
1425     /* FIXME: arch_init.c assumes that this is not called throughout
1426      * migration.  Ignore the problem since hot-unplug during migration
1427      * does not work anyway.
1428      */
1429
1430     rcu_read_lock();
1431     block = find_ram_block(addr);
1432     if (block) {
1433         memset(block->idstr, 0, sizeof(block->idstr));
1434     }
1435     rcu_read_unlock();
1436 }
1437
1438 static int memory_try_enable_merging(void *addr, size_t len)
1439 {
1440     if (!machine_mem_merge(current_machine)) {
1441         /* disabled by the user */
1442         return 0;
1443     }
1444
1445     return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1446 }
1447
1448 /* Only legal before guest might have detected the memory size: e.g. on
1449  * incoming migration, or right after reset.
1450  *
1451  * As memory core doesn't know how is memory accessed, it is up to
1452  * resize callback to update device state and/or add assertions to detect
1453  * misuse, if necessary.
1454  */
1455 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1456 {
1457     RAMBlock *block = find_ram_block(base);
1458
1459     assert(block);
1460
1461     newsize = HOST_PAGE_ALIGN(newsize);
1462
1463     if (block->used_length == newsize) {
1464         return 0;
1465     }
1466
1467     if (!(block->flags & RAM_RESIZEABLE)) {
1468         error_setg_errno(errp, EINVAL,
1469                          "Length mismatch: %s: 0x" RAM_ADDR_FMT
1470                          " in != 0x" RAM_ADDR_FMT, block->idstr,
1471                          newsize, block->used_length);
1472         return -EINVAL;
1473     }
1474
1475     if (block->max_length < newsize) {
1476         error_setg_errno(errp, EINVAL,
1477                          "Length too large: %s: 0x" RAM_ADDR_FMT
1478                          " > 0x" RAM_ADDR_FMT, block->idstr,
1479                          newsize, block->max_length);
1480         return -EINVAL;
1481     }
1482
1483     cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1484     block->used_length = newsize;
1485     cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1486                                         DIRTY_CLIENTS_ALL);
1487     memory_region_set_size(block->mr, newsize);
1488     if (block->resized) {
1489         block->resized(block->idstr, newsize, block->host);
1490     }
1491     return 0;
1492 }
1493
1494 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1495 {
1496     RAMBlock *block;
1497     RAMBlock *last_block = NULL;
1498     ram_addr_t old_ram_size, new_ram_size;
1499
1500     old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1501
1502     qemu_mutex_lock_ramlist();
1503     new_block->offset = find_ram_offset(new_block->max_length);
1504
1505     if (!new_block->host) {
1506         if (xen_enabled()) {
1507             xen_ram_alloc(new_block->offset, new_block->max_length,
1508                           new_block->mr);
1509         } else {
1510             new_block->host = phys_mem_alloc(new_block->max_length,
1511                                              &new_block->mr->align);
1512             if (!new_block->host) {
1513                 error_setg_errno(errp, errno,
1514                                  "cannot set up guest memory '%s'",
1515                                  memory_region_name(new_block->mr));
1516                 qemu_mutex_unlock_ramlist();
1517                 return -1;
1518             }
1519             memory_try_enable_merging(new_block->host, new_block->max_length);
1520         }
1521     }
1522
1523     new_ram_size = MAX(old_ram_size,
1524               (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1525     if (new_ram_size > old_ram_size) {
1526         migration_bitmap_extend(old_ram_size, new_ram_size);
1527     }
1528     /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
1529      * QLIST (which has an RCU-friendly variant) does not have insertion at
1530      * tail, so save the last element in last_block.
1531      */
1532     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1533         last_block = block;
1534         if (block->max_length < new_block->max_length) {
1535             break;
1536         }
1537     }
1538     if (block) {
1539         QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1540     } else if (last_block) {
1541         QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1542     } else { /* list is empty */
1543         QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1544     }
1545     ram_list.mru_block = NULL;
1546
1547     /* Write list before version */
1548     smp_wmb();
1549     ram_list.version++;
1550     qemu_mutex_unlock_ramlist();
1551
1552     new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1553
1554     if (new_ram_size > old_ram_size) {
1555         int i;
1556
1557         /* ram_list.dirty_memory[] is protected by the iothread lock.  */
1558         for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1559             ram_list.dirty_memory[i] =
1560                 bitmap_zero_extend(ram_list.dirty_memory[i],
1561                                    old_ram_size, new_ram_size);
1562        }
1563     }
1564     cpu_physical_memory_set_dirty_range(new_block->offset,
1565                                         new_block->used_length,
1566                                         DIRTY_CLIENTS_ALL);
1567
1568     if (new_block->host) {
1569         qemu_ram_setup_dump(new_block->host, new_block->max_length);
1570         qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1571         qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1572         if (kvm_enabled()) {
1573             kvm_setup_guest_memory(new_block->host, new_block->max_length);
1574         }
1575     }
1576
1577     return new_block->offset;
1578 }
1579
1580 #ifdef __linux__
1581 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1582                                     bool share, const char *mem_path,
1583                                     Error **errp)
1584 {
1585     RAMBlock *new_block;
1586     ram_addr_t addr;
1587     Error *local_err = NULL;
1588
1589     if (xen_enabled()) {
1590         error_setg(errp, "-mem-path not supported with Xen");
1591         return -1;
1592     }
1593
1594     if (phys_mem_alloc != qemu_anon_ram_alloc) {
1595         /*
1596          * file_ram_alloc() needs to allocate just like
1597          * phys_mem_alloc, but we haven't bothered to provide
1598          * a hook there.
1599          */
1600         error_setg(errp,
1601                    "-mem-path not supported with this accelerator");
1602         return -1;
1603     }
1604
1605     size = HOST_PAGE_ALIGN(size);
1606     new_block = g_malloc0(sizeof(*new_block));
1607     new_block->mr = mr;
1608     new_block->used_length = size;
1609     new_block->max_length = size;
1610     new_block->flags = share ? RAM_SHARED : 0;
1611     new_block->flags |= RAM_FILE;
1612     new_block->host = file_ram_alloc(new_block, size,
1613                                      mem_path, errp);
1614     if (!new_block->host) {
1615         g_free(new_block);
1616         return -1;
1617     }
1618
1619     addr = ram_block_add(new_block, &local_err);
1620     if (local_err) {
1621         g_free(new_block);
1622         error_propagate(errp, local_err);
1623         return -1;
1624     }
1625     return addr;
1626 }
1627 #endif
1628
1629 static
1630 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1631                                    void (*resized)(const char*,
1632                                                    uint64_t length,
1633                                                    void *host),
1634                                    void *host, bool resizeable,
1635                                    MemoryRegion *mr, Error **errp)
1636 {
1637     RAMBlock *new_block;
1638     ram_addr_t addr;
1639     Error *local_err = NULL;
1640
1641     size = HOST_PAGE_ALIGN(size);
1642     max_size = HOST_PAGE_ALIGN(max_size);
1643     new_block = g_malloc0(sizeof(*new_block));
1644     new_block->mr = mr;
1645     new_block->resized = resized;
1646     new_block->used_length = size;
1647     new_block->max_length = max_size;
1648     assert(max_size >= size);
1649     new_block->fd = -1;
1650     new_block->host = host;
1651     if (host) {
1652         new_block->flags |= RAM_PREALLOC;
1653     }
1654     if (resizeable) {
1655         new_block->flags |= RAM_RESIZEABLE;
1656     }
1657     addr = ram_block_add(new_block, &local_err);
1658     if (local_err) {
1659         g_free(new_block);
1660         error_propagate(errp, local_err);
1661         return -1;
1662     }
1663     return addr;
1664 }
1665
1666 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1667                                    MemoryRegion *mr, Error **errp)
1668 {
1669     return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1670 }
1671
1672 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1673 {
1674     return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1675 }
1676
1677 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1678                                      void (*resized)(const char*,
1679                                                      uint64_t length,
1680                                                      void *host),
1681                                      MemoryRegion *mr, Error **errp)
1682 {
1683     return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1684 }
1685
1686 void qemu_ram_free_from_ptr(ram_addr_t addr)
1687 {
1688     RAMBlock *block;
1689
1690     qemu_mutex_lock_ramlist();
1691     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1692         if (addr == block->offset) {
1693             QLIST_REMOVE_RCU(block, next);
1694             ram_list.mru_block = NULL;
1695             /* Write list before version */
1696             smp_wmb();
1697             ram_list.version++;
1698             g_free_rcu(block, rcu);
1699             break;
1700         }
1701     }
1702     qemu_mutex_unlock_ramlist();
1703 }
1704
1705 static void reclaim_ramblock(RAMBlock *block)
1706 {
1707     if (block->flags & RAM_PREALLOC) {
1708         ;
1709     } else if (xen_enabled()) {
1710         xen_invalidate_map_cache_entry(block->host);
1711 #ifndef _WIN32
1712     } else if (block->fd >= 0) {
1713         if (block->flags & RAM_FILE) {
1714             qemu_ram_munmap(block->host, block->max_length);
1715         } else {
1716             munmap(block->host, block->max_length);
1717         }
1718         close(block->fd);
1719 #endif
1720     } else {
1721         qemu_anon_ram_free(block->host, block->max_length);
1722     }
1723     g_free(block);
1724 }
1725
1726 void qemu_ram_free(ram_addr_t addr)
1727 {
1728     RAMBlock *block;
1729
1730     qemu_mutex_lock_ramlist();
1731     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1732         if (addr == block->offset) {
1733             QLIST_REMOVE_RCU(block, next);
1734             ram_list.mru_block = NULL;
1735             /* Write list before version */
1736             smp_wmb();
1737             ram_list.version++;
1738             call_rcu(block, reclaim_ramblock, rcu);
1739             break;
1740         }
1741     }
1742     qemu_mutex_unlock_ramlist();
1743 }
1744
1745 #ifndef _WIN32
1746 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1747 {
1748     RAMBlock *block;
1749     ram_addr_t offset;
1750     int flags;
1751     void *area, *vaddr;
1752
1753     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1754         offset = addr - block->offset;
1755         if (offset < block->max_length) {
1756             vaddr = ramblock_ptr(block, offset);
1757             if (block->flags & RAM_PREALLOC) {
1758                 ;
1759             } else if (xen_enabled()) {
1760                 abort();
1761             } else {
1762                 flags = MAP_FIXED;
1763                 if (block->fd >= 0) {
1764                     flags |= (block->flags & RAM_SHARED ?
1765                               MAP_SHARED : MAP_PRIVATE);
1766                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1767                                 flags, block->fd, offset);
1768                 } else {
1769                     /*
1770                      * Remap needs to match alloc.  Accelerators that
1771                      * set phys_mem_alloc never remap.  If they did,
1772                      * we'd need a remap hook here.
1773                      */
1774                     assert(phys_mem_alloc == qemu_anon_ram_alloc);
1775
1776                     flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1777                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1778                                 flags, -1, 0);
1779                 }
1780                 if (area != vaddr) {
1781                     fprintf(stderr, "Could not remap addr: "
1782                             RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1783                             length, addr);
1784                     exit(1);
1785                 }
1786                 memory_try_enable_merging(vaddr, length);
1787                 qemu_ram_setup_dump(vaddr, length);
1788             }
1789         }
1790     }
1791 }
1792 #endif /* !_WIN32 */
1793
1794 int qemu_get_ram_fd(ram_addr_t addr)
1795 {
1796     RAMBlock *block;
1797     int fd;
1798
1799     rcu_read_lock();
1800     block = qemu_get_ram_block(addr);
1801     fd = block->fd;
1802     rcu_read_unlock();
1803     return fd;
1804 }
1805
1806 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1807 {
1808     RAMBlock *block;
1809     void *ptr;
1810
1811     rcu_read_lock();
1812     block = qemu_get_ram_block(addr);
1813     ptr = ramblock_ptr(block, 0);
1814     rcu_read_unlock();
1815     return ptr;
1816 }
1817
1818 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1819  * This should not be used for general purpose DMA.  Use address_space_map
1820  * or address_space_rw instead. For local memory (e.g. video ram) that the
1821  * device owns, use memory_region_get_ram_ptr.
1822  *
1823  * By the time this function returns, the returned pointer is not protected
1824  * by RCU anymore.  If the caller is not within an RCU critical section and
1825  * does not hold the iothread lock, it must have other means of protecting the
1826  * pointer, such as a reference to the region that includes the incoming
1827  * ram_addr_t.
1828  */
1829 void *qemu_get_ram_ptr(ram_addr_t addr)
1830 {
1831     RAMBlock *block;
1832     void *ptr;
1833
1834     rcu_read_lock();
1835     block = qemu_get_ram_block(addr);
1836
1837     if (xen_enabled() && block->host == NULL) {
1838         /* We need to check if the requested address is in the RAM
1839          * because we don't want to map the entire memory in QEMU.
1840          * In that case just map until the end of the page.
1841          */
1842         if (block->offset == 0) {
1843             ptr = xen_map_cache(addr, 0, 0);
1844             goto unlock;
1845         }
1846
1847         block->host = xen_map_cache(block->offset, block->max_length, 1);
1848     }
1849     ptr = ramblock_ptr(block, addr - block->offset);
1850
1851 unlock:
1852     rcu_read_unlock();
1853     return ptr;
1854 }
1855
1856 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1857  * but takes a size argument.
1858  *
1859  * By the time this function returns, the returned pointer is not protected
1860  * by RCU anymore.  If the caller is not within an RCU critical section and
1861  * does not hold the iothread lock, it must have other means of protecting the
1862  * pointer, such as a reference to the region that includes the incoming
1863  * ram_addr_t.
1864  */
1865 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1866 {
1867     void *ptr;
1868     if (*size == 0) {
1869         return NULL;
1870     }
1871     if (xen_enabled()) {
1872         return xen_map_cache(addr, *size, 1);
1873     } else {
1874         RAMBlock *block;
1875         rcu_read_lock();
1876         QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1877             if (addr - block->offset < block->max_length) {
1878                 if (addr - block->offset + *size > block->max_length)
1879                     *size = block->max_length - addr + block->offset;
1880                 ptr = ramblock_ptr(block, addr - block->offset);
1881                 rcu_read_unlock();
1882                 return ptr;
1883             }
1884         }
1885
1886         fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1887         abort();
1888     }
1889 }
1890
1891 /*
1892  * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1893  * in that RAMBlock.
1894  *
1895  * ptr: Host pointer to look up
1896  * round_offset: If true round the result offset down to a page boundary
1897  * *ram_addr: set to result ram_addr
1898  * *offset: set to result offset within the RAMBlock
1899  *
1900  * Returns: RAMBlock (or NULL if not found)
1901  *
1902  * By the time this function returns, the returned pointer is not protected
1903  * by RCU anymore.  If the caller is not within an RCU critical section and
1904  * does not hold the iothread lock, it must have other means of protecting the
1905  * pointer, such as a reference to the region that includes the incoming
1906  * ram_addr_t.
1907  */
1908 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1909                                    ram_addr_t *ram_addr,
1910                                    ram_addr_t *offset)
1911 {
1912     RAMBlock *block;
1913     uint8_t *host = ptr;
1914
1915     if (xen_enabled()) {
1916         rcu_read_lock();
1917         *ram_addr = xen_ram_addr_from_mapcache(ptr);
1918         block = qemu_get_ram_block(*ram_addr);
1919         if (block) {
1920             *offset = (host - block->host);
1921         }
1922         rcu_read_unlock();
1923         return block;
1924     }
1925
1926     rcu_read_lock();
1927     block = atomic_rcu_read(&ram_list.mru_block);
1928     if (block && block->host && host - block->host < block->max_length) {
1929         goto found;
1930     }
1931
1932     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1933         /* This case append when the block is not mapped. */
1934         if (block->host == NULL) {
1935             continue;
1936         }
1937         if (host - block->host < block->max_length) {
1938             goto found;
1939         }
1940     }
1941
1942     rcu_read_unlock();
1943     return NULL;
1944
1945 found:
1946     *offset = (host - block->host);
1947     if (round_offset) {
1948         *offset &= TARGET_PAGE_MASK;
1949     }
1950     *ram_addr = block->offset + *offset;
1951     rcu_read_unlock();
1952     return block;
1953 }
1954
1955 /*
1956  * Finds the named RAMBlock
1957  *
1958  * name: The name of RAMBlock to find
1959  *
1960  * Returns: RAMBlock (or NULL if not found)
1961  */
1962 RAMBlock *qemu_ram_block_by_name(const char *name)
1963 {
1964     RAMBlock *block;
1965
1966     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1967         if (!strcmp(name, block->idstr)) {
1968             return block;
1969         }
1970     }
1971
1972     return NULL;
1973 }
1974
1975 /* Some of the softmmu routines need to translate from a host pointer
1976    (typically a TLB entry) back to a ram offset.  */
1977 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1978 {
1979     RAMBlock *block;
1980     ram_addr_t offset; /* Not used */
1981
1982     block = qemu_ram_block_from_host(ptr, false, ram_addr, &offset);
1983
1984     if (!block) {
1985         return NULL;
1986     }
1987
1988     return block->mr;
1989 }
1990
1991 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1992                                uint64_t val, unsigned size)
1993 {
1994     if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1995         tb_invalidate_phys_page_fast(ram_addr, size);
1996     }
1997     switch (size) {
1998     case 1:
1999         stb_p(qemu_get_ram_ptr(ram_addr), val);
2000         break;
2001     case 2:
2002         stw_p(qemu_get_ram_ptr(ram_addr), val);
2003         break;
2004     case 4:
2005         stl_p(qemu_get_ram_ptr(ram_addr), val);
2006         break;
2007     default:
2008         abort();
2009     }
2010     /* Set both VGA and migration bits for simplicity and to remove
2011      * the notdirty callback faster.
2012      */
2013     cpu_physical_memory_set_dirty_range(ram_addr, size,
2014                                         DIRTY_CLIENTS_NOCODE);
2015     /* we remove the notdirty callback only if the code has been
2016        flushed */
2017     if (!cpu_physical_memory_is_clean(ram_addr)) {
2018         tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2019     }
2020 }
2021
2022 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2023                                  unsigned size, bool is_write)
2024 {
2025     return is_write;
2026 }
2027
2028 static const MemoryRegionOps notdirty_mem_ops = {
2029     .write = notdirty_mem_write,
2030     .valid.accepts = notdirty_mem_accepts,
2031     .endianness = DEVICE_NATIVE_ENDIAN,
2032 };
2033
2034 /* Generate a debug exception if a watchpoint has been hit.  */
2035 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2036 {
2037     CPUState *cpu = current_cpu;
2038     CPUArchState *env = cpu->env_ptr;
2039     target_ulong pc, cs_base;
2040     target_ulong vaddr;
2041     CPUWatchpoint *wp;
2042     int cpu_flags;
2043
2044     if (cpu->watchpoint_hit) {
2045         /* We re-entered the check after replacing the TB. Now raise
2046          * the debug interrupt so that is will trigger after the
2047          * current instruction. */
2048         cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2049         return;
2050     }
2051     vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2052     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2053         if (cpu_watchpoint_address_matches(wp, vaddr, len)
2054             && (wp->flags & flags)) {
2055             if (flags == BP_MEM_READ) {
2056                 wp->flags |= BP_WATCHPOINT_HIT_READ;
2057             } else {
2058                 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2059             }
2060             wp->hitaddr = vaddr;
2061             wp->hitattrs = attrs;
2062             if (!cpu->watchpoint_hit) {
2063                 cpu->watchpoint_hit = wp;
2064                 tb_check_watchpoint(cpu);
2065                 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2066                     cpu->exception_index = EXCP_DEBUG;
2067                     cpu_loop_exit(cpu);
2068                 } else {
2069                     cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2070                     tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2071                     cpu_resume_from_signal(cpu, NULL);
2072                 }
2073             }
2074         } else {
2075             wp->flags &= ~BP_WATCHPOINT_HIT;
2076         }
2077     }
2078 }
2079
2080 /* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
2081    so these check for a hit then pass through to the normal out-of-line
2082    phys routines.  */
2083 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2084                                   unsigned size, MemTxAttrs attrs)
2085 {
2086     MemTxResult res;
2087     uint64_t data;
2088
2089     check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2090     switch (size) {
2091     case 1:
2092         data = address_space_ldub(&address_space_memory, addr, attrs, &res);
2093         break;
2094     case 2:
2095         data = address_space_lduw(&address_space_memory, addr, attrs, &res);
2096         break;
2097     case 4:
2098         data = address_space_ldl(&address_space_memory, addr, attrs, &res);
2099         break;
2100     default: abort();
2101     }
2102     *pdata = data;
2103     return res;
2104 }
2105
2106 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2107                                    uint64_t val, unsigned size,
2108                                    MemTxAttrs attrs)
2109 {
2110     MemTxResult res;
2111
2112     check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2113     switch (size) {
2114     case 1:
2115         address_space_stb(&address_space_memory, addr, val, attrs, &res);
2116         break;
2117     case 2:
2118         address_space_stw(&address_space_memory, addr, val, attrs, &res);
2119         break;
2120     case 4:
2121         address_space_stl(&address_space_memory, addr, val, attrs, &res);
2122         break;
2123     default: abort();
2124     }
2125     return res;
2126 }
2127
2128 static const MemoryRegionOps watch_mem_ops = {
2129     .read_with_attrs = watch_mem_read,
2130     .write_with_attrs = watch_mem_write,
2131     .endianness = DEVICE_NATIVE_ENDIAN,
2132 };
2133
2134 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2135                                 unsigned len, MemTxAttrs attrs)
2136 {
2137     subpage_t *subpage = opaque;
2138     uint8_t buf[8];
2139     MemTxResult res;
2140
2141 #if defined(DEBUG_SUBPAGE)
2142     printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2143            subpage, len, addr);
2144 #endif
2145     res = address_space_read(subpage->as, addr + subpage->base,
2146                              attrs, buf, len);
2147     if (res) {
2148         return res;
2149     }
2150     switch (len) {
2151     case 1:
2152         *data = ldub_p(buf);
2153         return MEMTX_OK;
2154     case 2:
2155         *data = lduw_p(buf);
2156         return MEMTX_OK;
2157     case 4:
2158         *data = ldl_p(buf);
2159         return MEMTX_OK;
2160     case 8:
2161         *data = ldq_p(buf);
2162         return MEMTX_OK;
2163     default:
2164         abort();
2165     }
2166 }
2167
2168 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2169                                  uint64_t value, unsigned len, MemTxAttrs attrs)
2170 {
2171     subpage_t *subpage = opaque;
2172     uint8_t buf[8];
2173
2174 #if defined(DEBUG_SUBPAGE)
2175     printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2176            " value %"PRIx64"\n",
2177            __func__, subpage, len, addr, value);
2178 #endif
2179     switch (len) {
2180     case 1:
2181         stb_p(buf, value);
2182         break;
2183     case 2:
2184         stw_p(buf, value);
2185         break;
2186     case 4:
2187         stl_p(buf, value);
2188         break;
2189     case 8:
2190         stq_p(buf, value);
2191         break;
2192     default:
2193         abort();
2194     }
2195     return address_space_write(subpage->as, addr + subpage->base,
2196                                attrs, buf, len);
2197 }
2198
2199 static bool subpage_accepts(void *opaque, hwaddr addr,
2200                             unsigned len, bool is_write)
2201 {
2202     subpage_t *subpage = opaque;
2203 #if defined(DEBUG_SUBPAGE)
2204     printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2205            __func__, subpage, is_write ? 'w' : 'r', len, addr);
2206 #endif
2207
2208     return address_space_access_valid(subpage->as, addr + subpage->base,
2209                                       len, is_write);
2210 }
2211
2212 static const MemoryRegionOps subpage_ops = {
2213     .read_with_attrs = subpage_read,
2214     .write_with_attrs = subpage_write,
2215     .impl.min_access_size = 1,
2216     .impl.max_access_size = 8,
2217     .valid.min_access_size = 1,
2218     .valid.max_access_size = 8,
2219     .valid.accepts = subpage_accepts,
2220     .endianness = DEVICE_NATIVE_ENDIAN,
2221 };
2222
2223 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2224                              uint16_t section)
2225 {
2226     int idx, eidx;
2227
2228     if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2229         return -1;
2230     idx = SUBPAGE_IDX(start);
2231     eidx = SUBPAGE_IDX(end);
2232 #if defined(DEBUG_SUBPAGE)
2233     printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2234            __func__, mmio, start, end, idx, eidx, section);
2235 #endif
2236     for (; idx <= eidx; idx++) {
2237         mmio->sub_section[idx] = section;
2238     }
2239
2240     return 0;
2241 }
2242
2243 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2244 {
2245     subpage_t *mmio;
2246
2247     mmio = g_malloc0(sizeof(subpage_t));
2248
2249     mmio->as = as;
2250     mmio->base = base;
2251     memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2252                           NULL, TARGET_PAGE_SIZE);
2253     mmio->iomem.subpage = true;
2254 #if defined(DEBUG_SUBPAGE)
2255     printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2256            mmio, base, TARGET_PAGE_SIZE);
2257 #endif
2258     subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2259
2260     return mmio;
2261 }
2262
2263 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2264                               MemoryRegion *mr)
2265 {
2266     assert(as);
2267     MemoryRegionSection section = {
2268         .address_space = as,
2269         .mr = mr,
2270         .offset_within_address_space = 0,
2271         .offset_within_region = 0,
2272         .size = int128_2_64(),
2273     };
2274
2275     return phys_section_add(map, &section);
2276 }
2277
2278 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
2279 {
2280     CPUAddressSpace *cpuas = &cpu->cpu_ases[0];
2281     AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2282     MemoryRegionSection *sections = d->map.sections;
2283
2284     return sections[index & ~TARGET_PAGE_MASK].mr;
2285 }
2286
2287 static void io_mem_init(void)
2288 {
2289     memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2290     memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2291                           NULL, UINT64_MAX);
2292     memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2293                           NULL, UINT64_MAX);
2294     memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2295                           NULL, UINT64_MAX);
2296 }
2297
2298 static void mem_begin(MemoryListener *listener)
2299 {
2300     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2301     AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2302     uint16_t n;
2303
2304     n = dummy_section(&d->map, as, &io_mem_unassigned);
2305     assert(n == PHYS_SECTION_UNASSIGNED);
2306     n = dummy_section(&d->map, as, &io_mem_notdirty);
2307     assert(n == PHYS_SECTION_NOTDIRTY);
2308     n = dummy_section(&d->map, as, &io_mem_rom);
2309     assert(n == PHYS_SECTION_ROM);
2310     n = dummy_section(&d->map, as, &io_mem_watch);
2311     assert(n == PHYS_SECTION_WATCH);
2312
2313     d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2314     d->as = as;
2315     as->next_dispatch = d;
2316 }
2317
2318 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2319 {
2320     phys_sections_free(&d->map);
2321     g_free(d);
2322 }
2323
2324 static void mem_commit(MemoryListener *listener)
2325 {
2326     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2327     AddressSpaceDispatch *cur = as->dispatch;
2328     AddressSpaceDispatch *next = as->next_dispatch;
2329
2330     phys_page_compact_all(next, next->map.nodes_nb);
2331
2332     atomic_rcu_set(&as->dispatch, next);
2333     if (cur) {
2334         call_rcu(cur, address_space_dispatch_free, rcu);
2335     }
2336 }
2337
2338 static void tcg_commit(MemoryListener *listener)
2339 {
2340     CPUAddressSpace *cpuas;
2341     AddressSpaceDispatch *d;
2342
2343     /* since each CPU stores ram addresses in its TLB cache, we must
2344        reset the modified entries */
2345     cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2346     cpu_reloading_memory_map();
2347     /* The CPU and TLB are protected by the iothread lock.
2348      * We reload the dispatch pointer now because cpu_reloading_memory_map()
2349      * may have split the RCU critical section.
2350      */
2351     d = atomic_rcu_read(&cpuas->as->dispatch);
2352     cpuas->memory_dispatch = d;
2353     tlb_flush(cpuas->cpu, 1);
2354 }
2355
2356 void address_space_init_dispatch(AddressSpace *as)
2357 {
2358     as->dispatch = NULL;
2359     as->dispatch_listener = (MemoryListener) {
2360         .begin = mem_begin,
2361         .commit = mem_commit,
2362         .region_add = mem_add,
2363         .region_nop = mem_add,
2364         .priority = 0,
2365     };
2366     memory_listener_register(&as->dispatch_listener, as);
2367 }
2368
2369 void address_space_unregister(AddressSpace *as)
2370 {
2371     memory_listener_unregister(&as->dispatch_listener);
2372 }
2373
2374 void address_space_destroy_dispatch(AddressSpace *as)
2375 {
2376     AddressSpaceDispatch *d = as->dispatch;
2377
2378     atomic_rcu_set(&as->dispatch, NULL);
2379     if (d) {
2380         call_rcu(d, address_space_dispatch_free, rcu);
2381     }
2382 }
2383
2384 static void memory_map_init(void)
2385 {
2386     system_memory = g_malloc(sizeof(*system_memory));
2387
2388     memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2389     address_space_init(&address_space_memory, system_memory, "memory");
2390
2391     system_io = g_malloc(sizeof(*system_io));
2392     memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2393                           65536);
2394     address_space_init(&address_space_io, system_io, "I/O");
2395 }
2396
2397 MemoryRegion *get_system_memory(void)
2398 {
2399     return system_memory;
2400 }
2401
2402 MemoryRegion *get_system_io(void)
2403 {
2404     return system_io;
2405 }
2406
2407 #endif /* !defined(CONFIG_USER_ONLY) */
2408
2409 /* physical memory access (slow version, mainly for debug) */
2410 #if defined(CONFIG_USER_ONLY)
2411 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2412                         uint8_t *buf, int len, int is_write)
2413 {
2414     int l, flags;
2415     target_ulong page;
2416     void * p;
2417
2418     while (len > 0) {
2419         page = addr & TARGET_PAGE_MASK;
2420         l = (page + TARGET_PAGE_SIZE) - addr;
2421         if (l > len)
2422             l = len;
2423         flags = page_get_flags(page);
2424         if (!(flags & PAGE_VALID))
2425             return -1;
2426         if (is_write) {
2427             if (!(flags & PAGE_WRITE))
2428                 return -1;
2429             /* XXX: this code should not depend on lock_user */
2430             if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2431                 return -1;
2432             memcpy(p, buf, l);
2433             unlock_user(p, addr, l);
2434         } else {
2435             if (!(flags & PAGE_READ))
2436                 return -1;
2437             /* XXX: this code should not depend on lock_user */
2438             if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2439                 return -1;
2440             memcpy(buf, p, l);
2441             unlock_user(p, addr, 0);
2442         }
2443         len -= l;
2444         buf += l;
2445         addr += l;
2446     }
2447     return 0;
2448 }
2449
2450 #else
2451
2452 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2453                                      hwaddr length)
2454 {
2455     uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2456     /* No early return if dirty_log_mask is or becomes 0, because
2457      * cpu_physical_memory_set_dirty_range will still call
2458      * xen_modified_memory.
2459      */
2460     if (dirty_log_mask) {
2461         dirty_log_mask =
2462             cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2463     }
2464     if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2465         tb_invalidate_phys_range(addr, addr + length);
2466         dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2467     }
2468     cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2469 }
2470
2471 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2472 {
2473     unsigned access_size_max = mr->ops->valid.max_access_size;
2474
2475     /* Regions are assumed to support 1-4 byte accesses unless
2476        otherwise specified.  */
2477     if (access_size_max == 0) {
2478         access_size_max = 4;
2479     }
2480
2481     /* Bound the maximum access by the alignment of the address.  */
2482     if (!mr->ops->impl.unaligned) {
2483         unsigned align_size_max = addr & -addr;
2484         if (align_size_max != 0 && align_size_max < access_size_max) {
2485             access_size_max = align_size_max;
2486         }
2487     }
2488
2489     /* Don't attempt accesses larger than the maximum.  */
2490     if (l > access_size_max) {
2491         l = access_size_max;
2492     }
2493     l = pow2floor(l);
2494
2495     return l;
2496 }
2497
2498 static bool prepare_mmio_access(MemoryRegion *mr)
2499 {
2500     bool unlocked = !qemu_mutex_iothread_locked();
2501     bool release_lock = false;
2502
2503     if (unlocked && mr->global_locking) {
2504         qemu_mutex_lock_iothread();
2505         unlocked = false;
2506         release_lock = true;
2507     }
2508     if (mr->flush_coalesced_mmio) {
2509         if (unlocked) {
2510             qemu_mutex_lock_iothread();
2511         }
2512         qemu_flush_coalesced_mmio_buffer();
2513         if (unlocked) {
2514             qemu_mutex_unlock_iothread();
2515         }
2516     }
2517
2518     return release_lock;
2519 }
2520
2521 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2522                              uint8_t *buf, int len, bool is_write)
2523 {
2524     hwaddr l;
2525     uint8_t *ptr;
2526     uint64_t val;
2527     hwaddr addr1;
2528     MemoryRegion *mr;
2529     MemTxResult result = MEMTX_OK;
2530     bool release_lock = false;
2531
2532     rcu_read_lock();
2533     while (len > 0) {
2534         l = len;
2535         mr = address_space_translate(as, addr, &addr1, &l, is_write);
2536
2537         if (is_write) {
2538             if (!memory_access_is_direct(mr, is_write)) {
2539                 release_lock |= prepare_mmio_access(mr);
2540                 l = memory_access_size(mr, l, addr1);
2541                 /* XXX: could force current_cpu to NULL to avoid
2542                    potential bugs */
2543                 switch (l) {
2544                 case 8:
2545                     /* 64 bit write access */
2546                     val = ldq_p(buf);
2547                     result |= memory_region_dispatch_write(mr, addr1, val, 8,
2548                                                            attrs);
2549                     break;
2550                 case 4:
2551                     /* 32 bit write access */
2552                     val = ldl_p(buf);
2553                     result |= memory_region_dispatch_write(mr, addr1, val, 4,
2554                                                            attrs);
2555                     break;
2556                 case 2:
2557                     /* 16 bit write access */
2558                     val = lduw_p(buf);
2559                     result |= memory_region_dispatch_write(mr, addr1, val, 2,
2560                                                            attrs);
2561                     break;
2562                 case 1:
2563                     /* 8 bit write access */
2564                     val = ldub_p(buf);
2565                     result |= memory_region_dispatch_write(mr, addr1, val, 1,
2566                                                            attrs);
2567                     break;
2568                 default:
2569                     abort();
2570                 }
2571             } else {
2572                 addr1 += memory_region_get_ram_addr(mr);
2573                 /* RAM case */
2574                 ptr = qemu_get_ram_ptr(addr1);
2575                 memcpy(ptr, buf, l);
2576                 invalidate_and_set_dirty(mr, addr1, l);
2577             }
2578         } else {
2579             if (!memory_access_is_direct(mr, is_write)) {
2580                 /* I/O case */
2581                 release_lock |= prepare_mmio_access(mr);
2582                 l = memory_access_size(mr, l, addr1);
2583                 switch (l) {
2584                 case 8:
2585                     /* 64 bit read access */
2586                     result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2587                                                           attrs);
2588                     stq_p(buf, val);
2589                     break;
2590                 case 4:
2591                     /* 32 bit read access */
2592                     result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2593                                                           attrs);
2594                     stl_p(buf, val);
2595                     break;
2596                 case 2:
2597                     /* 16 bit read access */
2598                     result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2599                                                           attrs);
2600                     stw_p(buf, val);
2601                     break;
2602                 case 1:
2603                     /* 8 bit read access */
2604                     result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2605                                                           attrs);
2606                     stb_p(buf, val);
2607                     break;
2608                 default:
2609                     abort();
2610                 }
2611             } else {
2612                 /* RAM case */
2613                 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2614                 memcpy(buf, ptr, l);
2615             }
2616         }
2617
2618         if (release_lock) {
2619             qemu_mutex_unlock_iothread();
2620             release_lock = false;
2621         }
2622
2623         len -= l;
2624         buf += l;
2625         addr += l;
2626     }
2627     rcu_read_unlock();
2628
2629     return result;
2630 }
2631
2632 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2633                                 const uint8_t *buf, int len)
2634 {
2635     return address_space_rw(as, addr, attrs, (uint8_t *)buf, len, true);
2636 }
2637
2638 MemTxResult address_space_read(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2639                                uint8_t *buf, int len)
2640 {
2641     return address_space_rw(as, addr, attrs, buf, len, false);
2642 }
2643
2644
2645 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2646                             int len, int is_write)
2647 {
2648     address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2649                      buf, len, is_write);
2650 }
2651
2652 enum write_rom_type {
2653     WRITE_DATA,
2654     FLUSH_CACHE,
2655 };
2656
2657 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2658     hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2659 {
2660     hwaddr l;
2661     uint8_t *ptr;
2662     hwaddr addr1;
2663     MemoryRegion *mr;
2664
2665     rcu_read_lock();
2666     while (len > 0) {
2667         l = len;
2668         mr = address_space_translate(as, addr, &addr1, &l, true);
2669
2670         if (!(memory_region_is_ram(mr) ||
2671               memory_region_is_romd(mr))) {
2672             l = memory_access_size(mr, l, addr1);
2673         } else {
2674             addr1 += memory_region_get_ram_addr(mr);
2675             /* ROM/RAM case */
2676             ptr = qemu_get_ram_ptr(addr1);
2677             switch (type) {
2678             case WRITE_DATA:
2679                 memcpy(ptr, buf, l);
2680                 invalidate_and_set_dirty(mr, addr1, l);
2681                 break;
2682             case FLUSH_CACHE:
2683                 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2684                 break;
2685             }
2686         }
2687         len -= l;
2688         buf += l;
2689         addr += l;
2690     }
2691     rcu_read_unlock();
2692 }
2693
2694 /* used for ROM loading : can write in RAM and ROM */
2695 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2696                                    const uint8_t *buf, int len)
2697 {
2698     cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2699 }
2700
2701 void cpu_flush_icache_range(hwaddr start, int len)
2702 {
2703     /*
2704      * This function should do the same thing as an icache flush that was
2705      * triggered from within the guest. For TCG we are always cache coherent,
2706      * so there is no need to flush anything. For KVM / Xen we need to flush
2707      * the host's instruction cache at least.
2708      */
2709     if (tcg_enabled()) {
2710         return;
2711     }
2712
2713     cpu_physical_memory_write_rom_internal(&address_space_memory,
2714                                            start, NULL, len, FLUSH_CACHE);
2715 }
2716
2717 typedef struct {
2718     MemoryRegion *mr;
2719     void *buffer;
2720     hwaddr addr;
2721     hwaddr len;
2722     bool in_use;
2723 } BounceBuffer;
2724
2725 static BounceBuffer bounce;
2726
2727 typedef struct MapClient {
2728     QEMUBH *bh;
2729     QLIST_ENTRY(MapClient) link;
2730 } MapClient;
2731
2732 QemuMutex map_client_list_lock;
2733 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2734     = QLIST_HEAD_INITIALIZER(map_client_list);
2735
2736 static void cpu_unregister_map_client_do(MapClient *client)
2737 {
2738     QLIST_REMOVE(client, link);
2739     g_free(client);
2740 }
2741
2742 static void cpu_notify_map_clients_locked(void)
2743 {
2744     MapClient *client;
2745
2746     while (!QLIST_EMPTY(&map_client_list)) {
2747         client = QLIST_FIRST(&map_client_list);
2748         qemu_bh_schedule(client->bh);
2749         cpu_unregister_map_client_do(client);
2750     }
2751 }
2752
2753 void cpu_register_map_client(QEMUBH *bh)
2754 {
2755     MapClient *client = g_malloc(sizeof(*client));
2756
2757     qemu_mutex_lock(&map_client_list_lock);
2758     client->bh = bh;
2759     QLIST_INSERT_HEAD(&map_client_list, client, link);
2760     if (!atomic_read(&bounce.in_use)) {
2761         cpu_notify_map_clients_locked();
2762     }
2763     qemu_mutex_unlock(&map_client_list_lock);
2764 }
2765
2766 void cpu_exec_init_all(void)
2767 {
2768     qemu_mutex_init(&ram_list.mutex);
2769     io_mem_init();
2770     memory_map_init();
2771     qemu_mutex_init(&map_client_list_lock);
2772 }
2773
2774 void cpu_unregister_map_client(QEMUBH *bh)
2775 {
2776     MapClient *client;
2777
2778     qemu_mutex_lock(&map_client_list_lock);
2779     QLIST_FOREACH(client, &map_client_list, link) {
2780         if (client->bh == bh) {
2781             cpu_unregister_map_client_do(client);
2782             break;
2783         }
2784     }
2785     qemu_mutex_unlock(&map_client_list_lock);
2786 }
2787
2788 static void cpu_notify_map_clients(void)
2789 {
2790     qemu_mutex_lock(&map_client_list_lock);
2791     cpu_notify_map_clients_locked();
2792     qemu_mutex_unlock(&map_client_list_lock);
2793 }
2794
2795 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2796 {
2797     MemoryRegion *mr;
2798     hwaddr l, xlat;
2799
2800     rcu_read_lock();
2801     while (len > 0) {
2802         l = len;
2803         mr = address_space_translate(as, addr, &xlat, &l, is_write);
2804         if (!memory_access_is_direct(mr, is_write)) {
2805             l = memory_access_size(mr, l, addr);
2806             if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2807                 return false;
2808             }
2809         }
2810
2811         len -= l;
2812         addr += l;
2813     }
2814     rcu_read_unlock();
2815     return true;
2816 }
2817
2818 /* Map a physical memory region into a host virtual address.
2819  * May map a subset of the requested range, given by and returned in *plen.
2820  * May return NULL if resources needed to perform the mapping are exhausted.
2821  * Use only for reads OR writes - not for read-modify-write operations.
2822  * Use cpu_register_map_client() to know when retrying the map operation is
2823  * likely to succeed.
2824  */
2825 void *address_space_map(AddressSpace *as,
2826                         hwaddr addr,
2827                         hwaddr *plen,
2828                         bool is_write)
2829 {
2830     hwaddr len = *plen;
2831     hwaddr done = 0;
2832     hwaddr l, xlat, base;
2833     MemoryRegion *mr, *this_mr;
2834     ram_addr_t raddr;
2835
2836     if (len == 0) {
2837         return NULL;
2838     }
2839
2840     l = len;
2841     rcu_read_lock();
2842     mr = address_space_translate(as, addr, &xlat, &l, is_write);
2843
2844     if (!memory_access_is_direct(mr, is_write)) {
2845         if (atomic_xchg(&bounce.in_use, true)) {
2846             rcu_read_unlock();
2847             return NULL;
2848         }
2849         /* Avoid unbounded allocations */
2850         l = MIN(l, TARGET_PAGE_SIZE);
2851         bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2852         bounce.addr = addr;
2853         bounce.len = l;
2854
2855         memory_region_ref(mr);
2856         bounce.mr = mr;
2857         if (!is_write) {
2858             address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2859                                bounce.buffer, l);
2860         }
2861
2862         rcu_read_unlock();
2863         *plen = l;
2864         return bounce.buffer;
2865     }
2866
2867     base = xlat;
2868     raddr = memory_region_get_ram_addr(mr);
2869
2870     for (;;) {
2871         len -= l;
2872         addr += l;
2873         done += l;
2874         if (len == 0) {
2875             break;
2876         }
2877
2878         l = len;
2879         this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2880         if (this_mr != mr || xlat != base + done) {
2881             break;
2882         }
2883     }
2884
2885     memory_region_ref(mr);
2886     rcu_read_unlock();
2887     *plen = done;
2888     return qemu_ram_ptr_length(raddr + base, plen);
2889 }
2890
2891 /* Unmaps a memory region previously mapped by address_space_map().
2892  * Will also mark the memory as dirty if is_write == 1.  access_len gives
2893  * the amount of memory that was actually read or written by the caller.
2894  */
2895 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2896                          int is_write, hwaddr access_len)
2897 {
2898     if (buffer != bounce.buffer) {
2899         MemoryRegion *mr;
2900         ram_addr_t addr1;
2901
2902         mr = qemu_ram_addr_from_host(buffer, &addr1);
2903         assert(mr != NULL);
2904         if (is_write) {
2905             invalidate_and_set_dirty(mr, addr1, access_len);
2906         }
2907         if (xen_enabled()) {
2908             xen_invalidate_map_cache_entry(buffer);
2909         }
2910         memory_region_unref(mr);
2911         return;
2912     }
2913     if (is_write) {
2914         address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
2915                             bounce.buffer, access_len);
2916     }
2917     qemu_vfree(bounce.buffer);
2918     bounce.buffer = NULL;
2919     memory_region_unref(bounce.mr);
2920     atomic_mb_set(&bounce.in_use, false);
2921     cpu_notify_map_clients();
2922 }
2923
2924 void *cpu_physical_memory_map(hwaddr addr,
2925                               hwaddr *plen,
2926                               int is_write)
2927 {
2928     return address_space_map(&address_space_memory, addr, plen, is_write);
2929 }
2930
2931 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2932                                int is_write, hwaddr access_len)
2933 {
2934     return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2935 }
2936
2937 /* warning: addr must be aligned */
2938 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
2939                                                   MemTxAttrs attrs,
2940                                                   MemTxResult *result,
2941                                                   enum device_endian endian)
2942 {
2943     uint8_t *ptr;
2944     uint64_t val;
2945     MemoryRegion *mr;
2946     hwaddr l = 4;
2947     hwaddr addr1;
2948     MemTxResult r;
2949     bool release_lock = false;
2950
2951     rcu_read_lock();
2952     mr = address_space_translate(as, addr, &addr1, &l, false);
2953     if (l < 4 || !memory_access_is_direct(mr, false)) {
2954         release_lock |= prepare_mmio_access(mr);
2955
2956         /* I/O case */
2957         r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
2958 #if defined(TARGET_WORDS_BIGENDIAN)
2959         if (endian == DEVICE_LITTLE_ENDIAN) {
2960             val = bswap32(val);
2961         }
2962 #else
2963         if (endian == DEVICE_BIG_ENDIAN) {
2964             val = bswap32(val);
2965         }
2966 #endif
2967     } else {
2968         /* RAM case */
2969         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2970                                 & TARGET_PAGE_MASK)
2971                                + addr1);
2972         switch (endian) {
2973         case DEVICE_LITTLE_ENDIAN:
2974             val = ldl_le_p(ptr);
2975             break;
2976         case DEVICE_BIG_ENDIAN:
2977             val = ldl_be_p(ptr);
2978             break;
2979         default:
2980             val = ldl_p(ptr);
2981             break;
2982         }
2983         r = MEMTX_OK;
2984     }
2985     if (result) {
2986         *result = r;
2987     }
2988     if (release_lock) {
2989         qemu_mutex_unlock_iothread();
2990     }
2991     rcu_read_unlock();
2992     return val;
2993 }
2994
2995 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
2996                            MemTxAttrs attrs, MemTxResult *result)
2997 {
2998     return address_space_ldl_internal(as, addr, attrs, result,
2999                                       DEVICE_NATIVE_ENDIAN);
3000 }
3001
3002 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3003                               MemTxAttrs attrs, MemTxResult *result)
3004 {
3005     return address_space_ldl_internal(as, addr, attrs, result,
3006                                       DEVICE_LITTLE_ENDIAN);
3007 }
3008
3009 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3010                               MemTxAttrs attrs, MemTxResult *result)
3011 {
3012     return address_space_ldl_internal(as, addr, attrs, result,
3013                                       DEVICE_BIG_ENDIAN);
3014 }
3015
3016 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3017 {
3018     return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3019 }
3020
3021 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3022 {
3023     return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3024 }
3025
3026 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3027 {
3028     return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3029 }
3030
3031 /* warning: addr must be aligned */
3032 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3033                                                   MemTxAttrs attrs,
3034                                                   MemTxResult *result,
3035                                                   enum device_endian endian)
3036 {
3037     uint8_t *ptr;
3038     uint64_t val;
3039     MemoryRegion *mr;
3040     hwaddr l = 8;
3041     hwaddr addr1;
3042     MemTxResult r;
3043     bool release_lock = false;
3044
3045     rcu_read_lock();
3046     mr = address_space_translate(as, addr, &addr1, &l,
3047                                  false);
3048     if (l < 8 || !memory_access_is_direct(mr, false)) {
3049         release_lock |= prepare_mmio_access(mr);
3050
3051         /* I/O case */
3052         r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3053 #if defined(TARGET_WORDS_BIGENDIAN)
3054         if (endian == DEVICE_LITTLE_ENDIAN) {
3055             val = bswap64(val);
3056         }
3057 #else
3058         if (endian == DEVICE_BIG_ENDIAN) {
3059             val = bswap64(val);
3060         }
3061 #endif
3062     } else {
3063         /* RAM case */
3064         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3065                                 & TARGET_PAGE_MASK)
3066                                + addr1);
3067         switch (endian) {
3068         case DEVICE_LITTLE_ENDIAN:
3069             val = ldq_le_p(ptr);
3070             break;
3071         case DEVICE_BIG_ENDIAN:
3072             val = ldq_be_p(ptr);
3073             break;
3074         default:
3075             val = ldq_p(ptr);
3076             break;
3077         }
3078         r = MEMTX_OK;
3079     }
3080     if (result) {
3081         *result = r;
3082     }
3083     if (release_lock) {
3084         qemu_mutex_unlock_iothread();
3085     }
3086     rcu_read_unlock();
3087     return val;
3088 }
3089
3090 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3091                            MemTxAttrs attrs, MemTxResult *result)
3092 {
3093     return address_space_ldq_internal(as, addr, attrs, result,
3094                                       DEVICE_NATIVE_ENDIAN);
3095 }
3096
3097 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3098                            MemTxAttrs attrs, MemTxResult *result)
3099 {
3100     return address_space_ldq_internal(as, addr, attrs, result,
3101                                       DEVICE_LITTLE_ENDIAN);
3102 }
3103
3104 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3105                            MemTxAttrs attrs, MemTxResult *result)
3106 {
3107     return address_space_ldq_internal(as, addr, attrs, result,
3108                                       DEVICE_BIG_ENDIAN);
3109 }
3110
3111 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3112 {
3113     return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3114 }
3115
3116 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3117 {
3118     return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3119 }
3120
3121 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3122 {
3123     return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3124 }
3125
3126 /* XXX: optimize */
3127 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3128                             MemTxAttrs attrs, MemTxResult *result)
3129 {
3130     uint8_t val;
3131     MemTxResult r;
3132
3133     r = address_space_rw(as, addr, attrs, &val, 1, 0);
3134     if (result) {
3135         *result = r;
3136     }
3137     return val;
3138 }
3139
3140 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3141 {
3142     return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3143 }
3144
3145 /* warning: addr must be aligned */
3146 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3147                                                    hwaddr addr,
3148                                                    MemTxAttrs attrs,
3149                                                    MemTxResult *result,
3150                                                    enum device_endian endian)
3151 {
3152     uint8_t *ptr;
3153     uint64_t val;
3154     MemoryRegion *mr;
3155     hwaddr l = 2;
3156     hwaddr addr1;
3157     MemTxResult r;
3158     bool release_lock = false;
3159
3160     rcu_read_lock();
3161     mr = address_space_translate(as, addr, &addr1, &l,
3162                                  false);
3163     if (l < 2 || !memory_access_is_direct(mr, false)) {
3164         release_lock |= prepare_mmio_access(mr);
3165
3166         /* I/O case */
3167         r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3168 #if defined(TARGET_WORDS_BIGENDIAN)
3169         if (endian == DEVICE_LITTLE_ENDIAN) {
3170             val = bswap16(val);
3171         }
3172 #else
3173         if (endian == DEVICE_BIG_ENDIAN) {
3174             val = bswap16(val);
3175         }
3176 #endif
3177     } else {
3178         /* RAM case */
3179         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3180                                 & TARGET_PAGE_MASK)
3181                                + addr1);
3182         switch (endian) {
3183         case DEVICE_LITTLE_ENDIAN:
3184             val = lduw_le_p(ptr);
3185             break;
3186         case DEVICE_BIG_ENDIAN:
3187             val = lduw_be_p(ptr);
3188             break;
3189         default:
3190             val = lduw_p(ptr);
3191             break;
3192         }
3193         r = MEMTX_OK;
3194     }
3195     if (result) {
3196         *result = r;
3197     }
3198     if (release_lock) {
3199         qemu_mutex_unlock_iothread();
3200     }
3201     rcu_read_unlock();
3202     return val;
3203 }
3204
3205 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3206                            MemTxAttrs attrs, MemTxResult *result)
3207 {
3208     return address_space_lduw_internal(as, addr, attrs, result,
3209                                        DEVICE_NATIVE_ENDIAN);
3210 }
3211
3212 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3213                            MemTxAttrs attrs, MemTxResult *result)
3214 {
3215     return address_space_lduw_internal(as, addr, attrs, result,
3216                                        DEVICE_LITTLE_ENDIAN);
3217 }
3218
3219 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3220                            MemTxAttrs attrs, MemTxResult *result)
3221 {
3222     return address_space_lduw_internal(as, addr, attrs, result,
3223                                        DEVICE_BIG_ENDIAN);
3224 }
3225
3226 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3227 {
3228     return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3229 }
3230
3231 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3232 {
3233     return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3234 }
3235
3236 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3237 {
3238     return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3239 }
3240
3241 /* warning: addr must be aligned. The ram page is not masked as dirty
3242    and the code inside is not invalidated. It is useful if the dirty
3243    bits are used to track modified PTEs */
3244 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3245                                 MemTxAttrs attrs, MemTxResult *result)
3246 {
3247     uint8_t *ptr;
3248     MemoryRegion *mr;
3249     hwaddr l = 4;
3250     hwaddr addr1;
3251     MemTxResult r;
3252     uint8_t dirty_log_mask;
3253     bool release_lock = false;
3254
3255     rcu_read_lock();
3256     mr = address_space_translate(as, addr, &addr1, &l,
3257                                  true);
3258     if (l < 4 || !memory_access_is_direct(mr, true)) {
3259         release_lock |= prepare_mmio_access(mr);
3260
3261         r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3262     } else {
3263         addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3264         ptr = qemu_get_ram_ptr(addr1);
3265         stl_p(ptr, val);
3266
3267         dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3268         dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3269         cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3270         r = MEMTX_OK;
3271     }
3272     if (result) {
3273         *result = r;
3274     }
3275     if (release_lock) {
3276         qemu_mutex_unlock_iothread();
3277     }
3278     rcu_read_unlock();
3279 }
3280
3281 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3282 {
3283     address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3284 }
3285
3286 /* warning: addr must be aligned */
3287 static inline void address_space_stl_internal(AddressSpace *as,
3288                                               hwaddr addr, uint32_t val,
3289                                               MemTxAttrs attrs,
3290                                               MemTxResult *result,
3291                                               enum device_endian endian)
3292 {
3293     uint8_t *ptr;
3294     MemoryRegion *mr;
3295     hwaddr l = 4;
3296     hwaddr addr1;
3297     MemTxResult r;
3298     bool release_lock = false;
3299
3300     rcu_read_lock();
3301     mr = address_space_translate(as, addr, &addr1, &l,
3302                                  true);
3303     if (l < 4 || !memory_access_is_direct(mr, true)) {
3304         release_lock |= prepare_mmio_access(mr);
3305
3306 #if defined(TARGET_WORDS_BIGENDIAN)
3307         if (endian == DEVICE_LITTLE_ENDIAN) {
3308             val = bswap32(val);
3309         }
3310 #else
3311         if (endian == DEVICE_BIG_ENDIAN) {
3312             val = bswap32(val);
3313         }
3314 #endif
3315         r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3316     } else {
3317         /* RAM case */
3318         addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3319         ptr = qemu_get_ram_ptr(addr1);
3320         switch (endian) {
3321         case DEVICE_LITTLE_ENDIAN:
3322             stl_le_p(ptr, val);
3323             break;
3324         case DEVICE_BIG_ENDIAN:
3325             stl_be_p(ptr, val);
3326             break;
3327         default:
3328             stl_p(ptr, val);
3329             break;
3330         }
3331         invalidate_and_set_dirty(mr, addr1, 4);
3332         r = MEMTX_OK;
3333     }
3334     if (result) {
3335         *result = r;
3336     }
3337     if (release_lock) {
3338         qemu_mutex_unlock_iothread();
3339     }
3340     rcu_read_unlock();
3341 }
3342
3343 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3344                        MemTxAttrs attrs, MemTxResult *result)
3345 {
3346     address_space_stl_internal(as, addr, val, attrs, result,
3347                                DEVICE_NATIVE_ENDIAN);
3348 }
3349
3350 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3351                        MemTxAttrs attrs, MemTxResult *result)
3352 {
3353     address_space_stl_internal(as, addr, val, attrs, result,
3354                                DEVICE_LITTLE_ENDIAN);
3355 }
3356
3357 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3358                        MemTxAttrs attrs, MemTxResult *result)
3359 {
3360     address_space_stl_internal(as, addr, val, attrs, result,
3361                                DEVICE_BIG_ENDIAN);
3362 }
3363
3364 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3365 {
3366     address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3367 }
3368
3369 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3370 {
3371     address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3372 }
3373
3374 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3375 {
3376     address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3377 }
3378
3379 /* XXX: optimize */
3380 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3381                        MemTxAttrs attrs, MemTxResult *result)
3382 {
3383     uint8_t v = val;
3384     MemTxResult r;
3385
3386     r = address_space_rw(as, addr, attrs, &v, 1, 1);
3387     if (result) {
3388         *result = r;
3389     }
3390 }
3391
3392 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3393 {
3394     address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3395 }
3396
3397 /* warning: addr must be aligned */
3398 static inline void address_space_stw_internal(AddressSpace *as,
3399                                               hwaddr addr, uint32_t val,
3400                                               MemTxAttrs attrs,
3401                                               MemTxResult *result,
3402                                               enum device_endian endian)
3403 {
3404     uint8_t *ptr;
3405     MemoryRegion *mr;
3406     hwaddr l = 2;
3407     hwaddr addr1;
3408     MemTxResult r;
3409     bool release_lock = false;
3410
3411     rcu_read_lock();
3412     mr = address_space_translate(as, addr, &addr1, &l, true);
3413     if (l < 2 || !memory_access_is_direct(mr, true)) {
3414         release_lock |= prepare_mmio_access(mr);
3415
3416 #if defined(TARGET_WORDS_BIGENDIAN)
3417         if (endian == DEVICE_LITTLE_ENDIAN) {
3418             val = bswap16(val);
3419         }
3420 #else
3421         if (endian == DEVICE_BIG_ENDIAN) {
3422             val = bswap16(val);
3423         }
3424 #endif
3425         r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3426     } else {
3427         /* RAM case */
3428         addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3429         ptr = qemu_get_ram_ptr(addr1);
3430         switch (endian) {
3431         case DEVICE_LITTLE_ENDIAN:
3432             stw_le_p(ptr, val);
3433             break;
3434         case DEVICE_BIG_ENDIAN:
3435             stw_be_p(ptr, val);
3436             break;
3437         default:
3438             stw_p(ptr, val);
3439             break;
3440         }
3441         invalidate_and_set_dirty(mr, addr1, 2);
3442         r = MEMTX_OK;
3443     }
3444     if (result) {
3445         *result = r;
3446     }
3447     if (release_lock) {
3448         qemu_mutex_unlock_iothread();
3449     }
3450     rcu_read_unlock();
3451 }
3452
3453 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3454                        MemTxAttrs attrs, MemTxResult *result)
3455 {
3456     address_space_stw_internal(as, addr, val, attrs, result,
3457                                DEVICE_NATIVE_ENDIAN);
3458 }
3459
3460 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3461                        MemTxAttrs attrs, MemTxResult *result)
3462 {
3463     address_space_stw_internal(as, addr, val, attrs, result,
3464                                DEVICE_LITTLE_ENDIAN);
3465 }
3466
3467 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3468                        MemTxAttrs attrs, MemTxResult *result)
3469 {
3470     address_space_stw_internal(as, addr, val, attrs, result,
3471                                DEVICE_BIG_ENDIAN);
3472 }
3473
3474 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3475 {
3476     address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3477 }
3478
3479 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3480 {
3481     address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3482 }
3483
3484 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3485 {
3486     address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3487 }
3488
3489 /* XXX: optimize */
3490 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3491                        MemTxAttrs attrs, MemTxResult *result)
3492 {
3493     MemTxResult r;
3494     val = tswap64(val);
3495     r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3496     if (result) {
3497         *result = r;
3498     }
3499 }
3500
3501 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3502                        MemTxAttrs attrs, MemTxResult *result)
3503 {
3504     MemTxResult r;
3505     val = cpu_to_le64(val);
3506     r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3507     if (result) {
3508         *result = r;
3509     }
3510 }
3511 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3512                        MemTxAttrs attrs, MemTxResult *result)
3513 {
3514     MemTxResult r;
3515     val = cpu_to_be64(val);
3516     r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3517     if (result) {
3518         *result = r;
3519     }
3520 }
3521
3522 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3523 {
3524     address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3525 }
3526
3527 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3528 {
3529     address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3530 }
3531
3532 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3533 {
3534     address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3535 }
3536
3537 /* virtual memory access for debug (includes writing to ROM) */
3538 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3539                         uint8_t *buf, int len, int is_write)
3540 {
3541     int l;
3542     hwaddr phys_addr;
3543     target_ulong page;
3544
3545     while (len > 0) {
3546         page = addr & TARGET_PAGE_MASK;
3547         phys_addr = cpu_get_phys_page_debug(cpu, page);
3548         /* if no physical page mapped, return an error */
3549         if (phys_addr == -1)
3550             return -1;
3551         l = (page + TARGET_PAGE_SIZE) - addr;
3552         if (l > len)
3553             l = len;
3554         phys_addr += (addr & ~TARGET_PAGE_MASK);
3555         if (is_write) {
3556             cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
3557         } else {
3558             address_space_rw(cpu->as, phys_addr, MEMTXATTRS_UNSPECIFIED,
3559                              buf, l, 0);
3560         }
3561         len -= l;
3562         buf += l;
3563         addr += l;
3564     }
3565     return 0;
3566 }
3567
3568 /*
3569  * Allows code that needs to deal with migration bitmaps etc to still be built
3570  * target independent.
3571  */
3572 size_t qemu_target_page_bits(void)
3573 {
3574     return TARGET_PAGE_BITS;
3575 }
3576
3577 #endif
3578
3579 /*
3580  * A helper function for the _utterly broken_ virtio device model to find out if
3581  * it's running on a big endian machine. Don't do this at home kids!
3582  */
3583 bool target_words_bigendian(void);
3584 bool target_words_bigendian(void)
3585 {
3586 #if defined(TARGET_WORDS_BIGENDIAN)
3587     return true;
3588 #else
3589     return false;
3590 #endif
3591 }
3592
3593 #ifndef CONFIG_USER_ONLY
3594 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3595 {
3596     MemoryRegion*mr;
3597     hwaddr l = 1;
3598     bool res;
3599
3600     rcu_read_lock();
3601     mr = address_space_translate(&address_space_memory,
3602                                  phys_addr, &phys_addr, &l, false);
3603
3604     res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3605     rcu_read_unlock();
3606     return res;
3607 }
3608
3609 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3610 {
3611     RAMBlock *block;
3612     int ret = 0;
3613
3614     rcu_read_lock();
3615     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3616         ret = func(block->idstr, block->host, block->offset,
3617                    block->used_length, opaque);
3618         if (ret) {
3619             break;
3620         }
3621     }
3622     rcu_read_unlock();
3623     return ret;
3624 }
3625 #endif