exec.c

   1 /*
   2  *  Virtual page mapping
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include "config.h"
  20 #ifndef _WIN32
  21 #include <sys/types.h>
  22 #include <sys/mman.h>
  23 #endif
  24
  25 #include "qemu-common.h"
  26 #include "cpu.h"
  27 #include "tcg.h"
  28 #include "hw/hw.h"
  29 #if !defined(CONFIG_USER_ONLY)
  30 #include "hw/boards.h"
  31 #endif
  32 #include "hw/qdev.h"
  33 #include "qemu/osdep.h"
  34 #include "sysemu/kvm.h"
  35 #include "sysemu/sysemu.h"
  36 #include "hw/xen/xen.h"
  37 #include "qemu/timer.h"
  38 #include "qemu/config-file.h"
  39 #include "qemu/error-report.h"
  40 #include "exec/memory.h"
  41 #include "sysemu/dma.h"
  42 #include "exec/address-spaces.h"
  43 #if defined(CONFIG_USER_ONLY)
  44 #include <qemu.h>
  45 #else /* !CONFIG_USER_ONLY */
  46 #include "sysemu/xen-mapcache.h"
  47 #include "trace.h"
  48 #endif
  49 #include "exec/cpu-all.h"
  50 #include "qemu/rcu_queue.h"
  51 #include "qemu/main-loop.h"
  52 #include "translate-all.h"
  53 #include "sysemu/replay.h"
  54 #include "sysemu/qtest.h"
  55
  56 #include "exec/memory-internal.h"
  57 #include "exec/ram_addr.h"
  58
  59 #include "qemu/range.h"
  60 #ifndef _WIN32
  61 #include "qemu/mmap-alloc.h"
  62 #endif
  63
  64 //#define DEBUG_SUBPAGE
  65
  66 #if !defined(CONFIG_USER_ONLY)
  67 /* ram_list is read under rcu_read_lock()/rcu_read_unlock().  Writes
  68  * are protected by the ramlist lock.
  69  */
  70 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
  71
  72 static MemoryRegion *system_memory;
  73 static MemoryRegion *system_io;
  74
  75 AddressSpace address_space_io;
  76 AddressSpace address_space_memory;
  77
  78 MemoryRegion io_mem_rom, io_mem_notdirty;
  79 static MemoryRegion io_mem_unassigned;
  80
  81 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
  82 #define RAM_PREALLOC   (1 << 0)
  83
  84 /* RAM is mmap-ed with MAP_SHARED */
  85 #define RAM_SHARED     (1 << 1)
  86
  87 /* Only a portion of RAM (used_length) is actually used, and migrated.
  88  * This used_length size can change across reboots.
  89  */
  90 #define RAM_RESIZEABLE (1 << 2)
  91
  92 /* RAM is backed by an mmapped file.
  93  */
  94 #define RAM_FILE (1 << 3)
  95 #endif
  96
  97 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
  98 /* current CPU in the current thread. It is only valid inside
  99    cpu_exec() */
 100 __thread CPUState *current_cpu;
 101 /* 0 = Do not count executed instructions.
 102    1 = Precise instruction counting.
 103    2 = Adaptive rate instruction counting.  */
 104 int use_icount;
 105
 106 #if !defined(CONFIG_USER_ONLY)
 107
 108 typedef struct PhysPageEntry PhysPageEntry;
 109
 110 struct PhysPageEntry {
 111     /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
 112     uint32_t skip : 6;
 113      /* index into phys_sections (!skip) or phys_map_nodes (skip) */
 114     uint32_t ptr : 26;
 115 };
 116
 117 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
 118
 119 /* Size of the L2 (and L3, etc) page tables.  */
 120 #define ADDR_SPACE_BITS 64
 121
 122 #define P_L2_BITS 9
 123 #define P_L2_SIZE (1 << P_L2_BITS)
 124
 125 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
 126
 127 typedef PhysPageEntry Node[P_L2_SIZE];
 128
 129 typedef struct PhysPageMap {
 130     struct rcu_head rcu;
 131
 132     unsigned sections_nb;
 133     unsigned sections_nb_alloc;
 134     unsigned nodes_nb;
 135     unsigned nodes_nb_alloc;
 136     Node *nodes;
 137     MemoryRegionSection *sections;
 138 } PhysPageMap;
 139
 140 struct AddressSpaceDispatch {
 141     struct rcu_head rcu;
 142
 143     /* This is a multi-level map on the physical address space.
 144      * The bottom level has pointers to MemoryRegionSections.
 145      */
 146     PhysPageEntry phys_map;
 147     PhysPageMap map;
 148     AddressSpace *as;
 149 };
 150
 151 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
 152 typedef struct subpage_t {
 153     MemoryRegion iomem;
 154     AddressSpace *as;
 155     hwaddr base;
 156     uint16_t sub_section[TARGET_PAGE_SIZE];
 157 } subpage_t;
 158
 159 #define PHYS_SECTION_UNASSIGNED 0
 160 #define PHYS_SECTION_NOTDIRTY 1
 161 #define PHYS_SECTION_ROM 2
 162 #define PHYS_SECTION_WATCH 3
 163
 164 static void io_mem_init(void);
 165 static void memory_map_init(void);
 166 static void tcg_commit(MemoryListener *listener);
 167
 168 static MemoryRegion io_mem_watch;
 169
 170 /**
 171  * CPUAddressSpace: all the information a CPU needs about an AddressSpace
 172  * @cpu: the CPU whose AddressSpace this is
 173  * @as: the AddressSpace itself
 174  * @memory_dispatch: its dispatch pointer (cached, RCU protected)
 175  * @tcg_as_listener: listener for tracking changes to the AddressSpace
 176  */
 177 struct CPUAddressSpace {
 178     CPUState *cpu;
 179     AddressSpace *as;
 180     struct AddressSpaceDispatch *memory_dispatch;
 181     MemoryListener tcg_as_listener;
 182 };
 183
 184 #endif
 185
 186 #if !defined(CONFIG_USER_ONLY)
 187
 188 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
 189 {
 190     if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
 191         map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
 192         map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
 193         map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
 194     }
 195 }
 196
 197 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
 198 {
 199     unsigned i;
 200     uint32_t ret;
 201     PhysPageEntry e;
 202     PhysPageEntry *p;
 203
 204     ret = map->nodes_nb++;
 205     p = map->nodes[ret];
 206     assert(ret != PHYS_MAP_NODE_NIL);
 207     assert(ret != map->nodes_nb_alloc);
 208
 209     e.skip = leaf ? 0 : 1;
 210     e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
 211     for (i = 0; i < P_L2_SIZE; ++i) {
 212         memcpy(&p[i], &e, sizeof(e));
 213     }
 214     return ret;
 215 }
 216
 217 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
 218                                 hwaddr *index, hwaddr *nb, uint16_t leaf,
 219                                 int level)
 220 {
 221     PhysPageEntry *p;
 222     hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
 223
 224     if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
 225         lp->ptr = phys_map_node_alloc(map, level == 0);
 226     }
 227     p = map->nodes[lp->ptr];
 228     lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
 229
 230     while (*nb && lp < &p[P_L2_SIZE]) {
 231         if ((*index & (step - 1)) == 0 && *nb >= step) {
 232             lp->skip = 0;
 233             lp->ptr = leaf;
 234             *index += step;
 235             *nb -= step;
 236         } else {
 237             phys_page_set_level(map, lp, index, nb, leaf, level - 1);
 238         }
 239         ++lp;
 240     }
 241 }
 242
 243 static void phys_page_set(AddressSpaceDispatch *d,
 244                           hwaddr index, hwaddr nb,
 245                           uint16_t leaf)
 246 {
 247     /* Wildly overreserve - it doesn't matter much. */
 248     phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
 249
 250     phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
 251 }
 252
 253 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
 254  * and update our entry so we can skip it and go directly to the destination.
 255  */
 256 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
 257 {
 258     unsigned valid_ptr = P_L2_SIZE;
 259     int valid = 0;
 260     PhysPageEntry *p;
 261     int i;
 262
 263     if (lp->ptr == PHYS_MAP_NODE_NIL) {
 264         return;
 265     }
 266
 267     p = nodes[lp->ptr];
 268     for (i = 0; i < P_L2_SIZE; i++) {
 269         if (p[i].ptr == PHYS_MAP_NODE_NIL) {
 270             continue;
 271         }
 272
 273         valid_ptr = i;
 274         valid++;
 275         if (p[i].skip) {
 276             phys_page_compact(&p[i], nodes, compacted);
 277         }
 278     }
 279
 280     /* We can only compress if there's only one child. */
 281     if (valid != 1) {
 282         return;
 283     }
 284
 285     assert(valid_ptr < P_L2_SIZE);
 286
 287     /* Don't compress if it won't fit in the # of bits we have. */
 288     if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
 289         return;
 290     }
 291
 292     lp->ptr = p[valid_ptr].ptr;
 293     if (!p[valid_ptr].skip) {
 294         /* If our only child is a leaf, make this a leaf. */
 295         /* By design, we should have made this node a leaf to begin with so we
 296          * should never reach here.
 297          * But since it's so simple to handle this, let's do it just in case we
 298          * change this rule.
 299          */
 300         lp->skip = 0;
 301     } else {
 302         lp->skip += p[valid_ptr].skip;
 303     }
 304 }
 305
 306 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
 307 {
 308     DECLARE_BITMAP(compacted, nodes_nb);
 309
 310     if (d->phys_map.skip) {
 311         phys_page_compact(&d->phys_map, d->map.nodes, compacted);
 312     }
 313 }
 314
 315 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
 316                                            Node *nodes, MemoryRegionSection *sections)
 317 {
 318     PhysPageEntry *p;
 319     hwaddr index = addr >> TARGET_PAGE_BITS;
 320     int i;
 321
 322     for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
 323         if (lp.ptr == PHYS_MAP_NODE_NIL) {
 324             return &sections[PHYS_SECTION_UNASSIGNED];
 325         }
 326         p = nodes[lp.ptr];
 327         lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
 328     }
 329
 330     if (sections[lp.ptr].size.hi ||
 331         range_covers_byte(sections[lp.ptr].offset_within_address_space,
 332                           sections[lp.ptr].size.lo, addr)) {
 333         return &sections[lp.ptr];
 334     } else {
 335         return &sections[PHYS_SECTION_UNASSIGNED];
 336     }
 337 }
 338
 339 bool memory_region_is_unassigned(MemoryRegion *mr)
 340 {
 341     return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
 342         && mr != &io_mem_watch;
 343 }
 344
 345 /* Called from RCU critical section */
 346 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
 347                                                         hwaddr addr,
 348                                                         bool resolve_subpage)
 349 {
 350     MemoryRegionSection *section;
 351     subpage_t *subpage;
 352
 353     section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
 354     if (resolve_subpage && section->mr->subpage) {
 355         subpage = container_of(section->mr, subpage_t, iomem);
 356         section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
 357     }
 358     return section;
 359 }
 360
 361 /* Called from RCU critical section */
 362 static MemoryRegionSection *
 363 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
 364                                  hwaddr *plen, bool resolve_subpage)
 365 {
 366     MemoryRegionSection *section;
 367     MemoryRegion *mr;
 368     Int128 diff;
 369
 370     section = address_space_lookup_region(d, addr, resolve_subpage);
 371     /* Compute offset within MemoryRegionSection */
 372     addr -= section->offset_within_address_space;
 373
 374     /* Compute offset within MemoryRegion */
 375     *xlat = addr + section->offset_within_region;
 376
 377     mr = section->mr;
 378
 379     /* MMIO registers can be expected to perform full-width accesses based only
 380      * on their address, without considering adjacent registers that could
 381      * decode to completely different MemoryRegions.  When such registers
 382      * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
 383      * regions overlap wildly.  For this reason we cannot clamp the accesses
 384      * here.
 385      *
 386      * If the length is small (as is the case for address_space_ldl/stl),
 387      * everything works fine.  If the incoming length is large, however,
 388      * the caller really has to do the clamping through memory_access_size.
 389      */
 390     if (memory_region_is_ram(mr)) {
 391         diff = int128_sub(section->size, int128_make64(addr));
 392         *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
 393     }
 394     return section;
 395 }
 396
 397 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
 398 {
 399     if (memory_region_is_ram(mr)) {
 400         return !(is_write && mr->readonly);
 401     }
 402     if (memory_region_is_romd(mr)) {
 403         return !is_write;
 404     }
 405
 406     return false;
 407 }
 408
 409 /* Called from RCU critical section */
 410 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
 411                                       hwaddr *xlat, hwaddr *plen,
 412                                       bool is_write)
 413 {
 414     IOMMUTLBEntry iotlb;
 415     MemoryRegionSection *section;
 416     MemoryRegion *mr;
 417
 418     for (;;) {
 419         AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
 420         section = address_space_translate_internal(d, addr, &addr, plen, true);
 421         mr = section->mr;
 422
 423         if (!mr->iommu_ops) {
 424             break;
 425         }
 426
 427         iotlb = mr->iommu_ops->translate(mr, addr, is_write);
 428         addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
 429                 | (addr & iotlb.addr_mask));
 430         *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
 431         if (!(iotlb.perm & (1 << is_write))) {
 432             mr = &io_mem_unassigned;
 433             break;
 434         }
 435
 436         as = iotlb.target_as;
 437     }
 438
 439     if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
 440         hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
 441         *plen = MIN(page, *plen);
 442     }
 443
 444     *xlat = addr;
 445     return mr;
 446 }
 447
 448 /* Called from RCU critical section */
 449 MemoryRegionSection *
 450 address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
 451                                   hwaddr *xlat, hwaddr *plen)
 452 {
 453     MemoryRegionSection *section;
 454     section = address_space_translate_internal(cpu->cpu_ases[0].memory_dispatch,
 455                                                addr, xlat, plen, false);
 456
 457     assert(!section->mr->iommu_ops);
 458     return section;
 459 }
 460 #endif
 461
 462 #if !defined(CONFIG_USER_ONLY)
 463
 464 static int cpu_common_post_load(void *opaque, int version_id)
 465 {
 466     CPUState *cpu = opaque;
 467
 468     /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
 469        version_id is increased. */
 470     cpu->interrupt_request &= ~0x01;
 471     tlb_flush(cpu, 1);
 472
 473     return 0;
 474 }
 475
 476 static int cpu_common_pre_load(void *opaque)
 477 {
 478     CPUState *cpu = opaque;
 479
 480     cpu->exception_index = -1;
 481
 482     return 0;
 483 }
 484
 485 static bool cpu_common_exception_index_needed(void *opaque)
 486 {
 487     CPUState *cpu = opaque;
 488
 489     return tcg_enabled() && cpu->exception_index != -1;
 490 }
 491
 492 static const VMStateDescription vmstate_cpu_common_exception_index = {
 493     .name = "cpu_common/exception_index",
 494     .version_id = 1,
 495     .minimum_version_id = 1,
 496     .needed = cpu_common_exception_index_needed,
 497     .fields = (VMStateField[]) {
 498         VMSTATE_INT32(exception_index, CPUState),
 499         VMSTATE_END_OF_LIST()
 500     }
 501 };
 502
 503 static bool cpu_common_crash_occurred_needed(void *opaque)
 504 {
 505     CPUState *cpu = opaque;
 506
 507     return cpu->crash_occurred;
 508 }
 509
 510 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
 511     .name = "cpu_common/crash_occurred",
 512     .version_id = 1,
 513     .minimum_version_id = 1,
 514     .needed = cpu_common_crash_occurred_needed,
 515     .fields = (VMStateField[]) {
 516         VMSTATE_BOOL(crash_occurred, CPUState),
 517         VMSTATE_END_OF_LIST()
 518     }
 519 };
 520
 521 const VMStateDescription vmstate_cpu_common = {
 522     .name = "cpu_common",
 523     .version_id = 1,
 524     .minimum_version_id = 1,
 525     .pre_load = cpu_common_pre_load,
 526     .post_load = cpu_common_post_load,
 527     .fields = (VMStateField[]) {
 528         VMSTATE_UINT32(halted, CPUState),
 529         VMSTATE_UINT32(interrupt_request, CPUState),
 530         VMSTATE_END_OF_LIST()
 531     },
 532     .subsections = (const VMStateDescription*[]) {
 533         &vmstate_cpu_common_exception_index,
 534         &vmstate_cpu_common_crash_occurred,
 535         NULL
 536     }
 537 };
 538
 539 #endif
 540
 541 CPUState *qemu_get_cpu(int index)
 542 {
 543     CPUState *cpu;
 544
 545     CPU_FOREACH(cpu) {
 546         if (cpu->cpu_index == index) {
 547             return cpu;
 548         }
 549     }
 550
 551     return NULL;
 552 }
 553
 554 #if !defined(CONFIG_USER_ONLY)
 555 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
 556 {
 557     /* We only support one address space per cpu at the moment.  */
 558     assert(cpu->as == as);
 559
 560     if (cpu->cpu_ases) {
 561         /* We've already registered the listener for our only AS */
 562         return;
 563     }
 564
 565     cpu->cpu_ases = g_new0(CPUAddressSpace, 1);
 566     cpu->cpu_ases[0].cpu = cpu;
 567     cpu->cpu_ases[0].as = as;
 568     cpu->cpu_ases[0].tcg_as_listener.commit = tcg_commit;
 569     memory_listener_register(&cpu->cpu_ases[0].tcg_as_listener, as);
 570 }
 571 #endif
 572
 573 #ifndef CONFIG_USER_ONLY
 574 static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
 575
 576 static int cpu_get_free_index(Error **errp)
 577 {
 578     int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
 579
 580     if (cpu >= MAX_CPUMASK_BITS) {
 581         error_setg(errp, "Trying to use more CPUs than max of %d",
 582                    MAX_CPUMASK_BITS);
 583         return -1;
 584     }
 585
 586     bitmap_set(cpu_index_map, cpu, 1);
 587     return cpu;
 588 }
 589
 590 void cpu_exec_exit(CPUState *cpu)
 591 {
 592     if (cpu->cpu_index == -1) {
 593         /* cpu_index was never allocated by this @cpu or was already freed. */
 594         return;
 595     }
 596
 597     bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
 598     cpu->cpu_index = -1;
 599 }
 600 #else
 601
 602 static int cpu_get_free_index(Error **errp)
 603 {
 604     CPUState *some_cpu;
 605     int cpu_index = 0;
 606
 607     CPU_FOREACH(some_cpu) {
 608         cpu_index++;
 609     }
 610     return cpu_index;
 611 }
 612
 613 void cpu_exec_exit(CPUState *cpu)
 614 {
 615 }
 616 #endif
 617
 618 void cpu_exec_init(CPUState *cpu, Error **errp)
 619 {
 620     CPUClass *cc = CPU_GET_CLASS(cpu);
 621     int cpu_index;
 622     Error *local_err = NULL;
 623
 624 #ifdef TARGET_WORDS_BIGENDIAN
 625     cpu->bigendian = true;
 626 #else
 627     cpu->bigendian = false;
 628 #endif
 629
 630 #ifndef CONFIG_USER_ONLY
 631     cpu->as = &address_space_memory;
 632     cpu->thread_id = qemu_get_thread_id();
 633 #endif
 634
 635 #if defined(CONFIG_USER_ONLY)
 636     cpu_list_lock();
 637 #endif
 638     cpu_index = cpu->cpu_index = cpu_get_free_index(&local_err);
 639     if (local_err) {
 640         error_propagate(errp, local_err);
 641 #if defined(CONFIG_USER_ONLY)
 642         cpu_list_unlock();
 643 #endif
 644         return;
 645     }
 646     QTAILQ_INSERT_TAIL(&cpus, cpu, node);
 647 #if defined(CONFIG_USER_ONLY)
 648     cpu_list_unlock();
 649 #endif
 650     if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
 651         vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
 652     }
 653 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
 654     register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
 655                     cpu_save, cpu_load, cpu->env_ptr);
 656     assert(cc->vmsd == NULL);
 657     assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
 658 #endif
 659     if (cc->vmsd != NULL) {
 660         vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
 661     }
 662 }
 663
 664 #if defined(CONFIG_USER_ONLY)
 665 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 666 {
 667     tb_invalidate_phys_page_range(pc, pc + 1, 0);
 668 }
 669 #else
 670 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 671 {
 672     hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
 673     if (phys != -1) {
 674         tb_invalidate_phys_addr(cpu->as,
 675                                 phys | (pc & ~TARGET_PAGE_MASK));
 676     }
 677 }
 678 #endif
 679
 680 #if defined(CONFIG_USER_ONLY)
 681 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 682
 683 {
 684 }
 685
 686 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 687                           int flags)
 688 {
 689     return -ENOSYS;
 690 }
 691
 692 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 693 {
 694 }
 695
 696 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 697                           int flags, CPUWatchpoint **watchpoint)
 698 {
 699     return -ENOSYS;
 700 }
 701 #else
 702 /* Add a watchpoint.  */
 703 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 704                           int flags, CPUWatchpoint **watchpoint)
 705 {
 706     CPUWatchpoint *wp;
 707
 708     /* forbid ranges which are empty or run off the end of the address space */
 709     if (len == 0 || (addr + len - 1) < addr) {
 710         error_report("tried to set invalid watchpoint at %"
 711                      VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
 712         return -EINVAL;
 713     }
 714     wp = g_malloc(sizeof(*wp));
 715
 716     wp->vaddr = addr;
 717     wp->len = len;
 718     wp->flags = flags;
 719
 720     /* keep all GDB-injected watchpoints in front */
 721     if (flags & BP_GDB) {
 722         QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
 723     } else {
 724         QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
 725     }
 726
 727     tlb_flush_page(cpu, addr);
 728
 729     if (watchpoint)
 730         *watchpoint = wp;
 731     return 0;
 732 }
 733
 734 /* Remove a specific watchpoint.  */
 735 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 736                           int flags)
 737 {
 738     CPUWatchpoint *wp;
 739
 740     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
 741         if (addr == wp->vaddr && len == wp->len
 742                 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
 743             cpu_watchpoint_remove_by_ref(cpu, wp);
 744             return 0;
 745         }
 746     }
 747     return -ENOENT;
 748 }
 749
 750 /* Remove a specific watchpoint by reference.  */
 751 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 752 {
 753     QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
 754
 755     tlb_flush_page(cpu, watchpoint->vaddr);
 756
 757     g_free(watchpoint);
 758 }
 759
 760 /* Remove all matching watchpoints.  */
 761 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 762 {
 763     CPUWatchpoint *wp, *next;
 764
 765     QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
 766         if (wp->flags & mask) {
 767             cpu_watchpoint_remove_by_ref(cpu, wp);
 768         }
 769     }
 770 }
 771
 772 /* Return true if this watchpoint address matches the specified
 773  * access (ie the address range covered by the watchpoint overlaps
 774  * partially or completely with the address range covered by the
 775  * access).
 776  */
 777 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
 778                                                   vaddr addr,
 779                                                   vaddr len)
 780 {
 781     /* We know the lengths are non-zero, but a little caution is
 782      * required to avoid errors in the case where the range ends
 783      * exactly at the top of the address space and so addr + len
 784      * wraps round to zero.
 785      */
 786     vaddr wpend = wp->vaddr + wp->len - 1;
 787     vaddr addrend = addr + len - 1;
 788
 789     return !(addr > wpend || wp->vaddr > addrend);
 790 }
 791
 792 #endif
 793
 794 /* Add a breakpoint.  */
 795 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
 796                           CPUBreakpoint **breakpoint)
 797 {
 798     CPUBreakpoint *bp;
 799
 800     bp = g_malloc(sizeof(*bp));
 801
 802     bp->pc = pc;
 803     bp->flags = flags;
 804
 805     /* keep all GDB-injected breakpoints in front */
 806     if (flags & BP_GDB) {
 807         QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
 808     } else {
 809         QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
 810     }
 811
 812     breakpoint_invalidate(cpu, pc);
 813
 814     if (breakpoint) {
 815         *breakpoint = bp;
 816     }
 817     return 0;
 818 }
 819
 820 /* Remove a specific breakpoint.  */
 821 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
 822 {
 823     CPUBreakpoint *bp;
 824
 825     QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
 826         if (bp->pc == pc && bp->flags == flags) {
 827             cpu_breakpoint_remove_by_ref(cpu, bp);
 828             return 0;
 829         }
 830     }
 831     return -ENOENT;
 832 }
 833
 834 /* Remove a specific breakpoint by reference.  */
 835 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
 836 {
 837     QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
 838
 839     breakpoint_invalidate(cpu, breakpoint->pc);
 840
 841     g_free(breakpoint);
 842 }
 843
 844 /* Remove all matching breakpoints. */
 845 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
 846 {
 847     CPUBreakpoint *bp, *next;
 848
 849     QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
 850         if (bp->flags & mask) {
 851             cpu_breakpoint_remove_by_ref(cpu, bp);
 852         }
 853     }
 854 }
 855
 856 /* enable or disable single step mode. EXCP_DEBUG is returned by the
 857    CPU loop after each instruction */
 858 void cpu_single_step(CPUState *cpu, int enabled)
 859 {
 860     if (cpu->singlestep_enabled != enabled) {
 861         cpu->singlestep_enabled = enabled;
 862         if (kvm_enabled()) {
 863             kvm_update_guest_debug(cpu, 0);
 864         } else {
 865             /* must flush all the translated code to avoid inconsistencies */
 866             /* XXX: only flush what is necessary */
 867             tb_flush(cpu);
 868         }
 869     }
 870 }
 871
 872 void QEMU_NORETURN cpu_abort(CPUState *cpu, const char *fmt, ...)
 873 {
 874     va_list ap;
 875     va_list ap2;
 876
 877     va_start(ap, fmt);
 878     va_copy(ap2, ap);
 879     fprintf(stderr, "qemu: fatal: ");
 880     vfprintf(stderr, fmt, ap);
 881     fprintf(stderr, "\n");
 882     cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 883     if (qemu_log_enabled()) {
 884         qemu_log("qemu: fatal: ");
 885         qemu_log_vprintf(fmt, ap2);
 886         qemu_log("\n");
 887         log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 888         qemu_log_flush();
 889         qemu_log_close();
 890     }
 891     va_end(ap2);
 892     va_end(ap);
 893     replay_finish();
 894 #if defined(CONFIG_USER_ONLY)
 895     {
 896         struct sigaction act;
 897         sigfillset(&act.sa_mask);
 898         act.sa_handler = SIG_DFL;
 899         sigaction(SIGABRT, &act, NULL);
 900     }
 901 #endif
 902     abort();
 903 }
 904
 905 #if !defined(CONFIG_USER_ONLY)
 906 /* Called from RCU critical section */
 907 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
 908 {
 909     RAMBlock *block;
 910
 911     block = atomic_rcu_read(&ram_list.mru_block);
 912     if (block && addr - block->offset < block->max_length) {
 913         return block;
 914     }
 915     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
 916         if (addr - block->offset < block->max_length) {
 917             goto found;
 918         }
 919     }
 920
 921     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
 922     abort();
 923
 924 found:
 925     /* It is safe to write mru_block outside the iothread lock.  This
 926      * is what happens:
 927      *
 928      *     mru_block = xxx
 929      *     rcu_read_unlock()
 930      *                                        xxx removed from list
 931      *                  rcu_read_lock()
 932      *                  read mru_block
 933      *                                        mru_block = NULL;
 934      *                                        call_rcu(reclaim_ramblock, xxx);
 935      *                  rcu_read_unlock()
 936      *
 937      * atomic_rcu_set is not needed here.  The block was already published
 938      * when it was placed into the list.  Here we're just making an extra
 939      * copy of the pointer.
 940      */
 941     ram_list.mru_block = block;
 942     return block;
 943 }
 944
 945 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
 946 {
 947     CPUState *cpu;
 948     ram_addr_t start1;
 949     RAMBlock *block;
 950     ram_addr_t end;
 951
 952     end = TARGET_PAGE_ALIGN(start + length);
 953     start &= TARGET_PAGE_MASK;
 954
 955     rcu_read_lock();
 956     block = qemu_get_ram_block(start);
 957     assert(block == qemu_get_ram_block(end - 1));
 958     start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
 959     CPU_FOREACH(cpu) {
 960         tlb_reset_dirty(cpu, start1, length);
 961     }
 962     rcu_read_unlock();
 963 }
 964
 965 /* Note: start and end must be within the same ram block.  */
 966 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
 967                                               ram_addr_t length,
 968                                               unsigned client)
 969 {
 970     unsigned long end, page;
 971     bool dirty;
 972
 973     if (length == 0) {
 974         return false;
 975     }
 976
 977     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
 978     page = start >> TARGET_PAGE_BITS;
 979     dirty = bitmap_test_and_clear_atomic(ram_list.dirty_memory[client],
 980                                          page, end - page);
 981
 982     if (dirty && tcg_enabled()) {
 983         tlb_reset_dirty_range_all(start, length);
 984     }
 985
 986     return dirty;
 987 }
 988
 989 /* Called from RCU critical section */
 990 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
 991                                        MemoryRegionSection *section,
 992                                        target_ulong vaddr,
 993                                        hwaddr paddr, hwaddr xlat,
 994                                        int prot,
 995                                        target_ulong *address)
 996 {
 997     hwaddr iotlb;
 998     CPUWatchpoint *wp;
 999
1000     if (memory_region_is_ram(section->mr)) {
1001         /* Normal RAM.  */
1002         iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1003             + xlat;
1004         if (!section->readonly) {
1005             iotlb |= PHYS_SECTION_NOTDIRTY;
1006         } else {
1007             iotlb |= PHYS_SECTION_ROM;
1008         }
1009     } else {
1010         AddressSpaceDispatch *d;
1011
1012         d = atomic_rcu_read(&section->address_space->dispatch);
1013         iotlb = section - d->map.sections;
1014         iotlb += xlat;
1015     }
1016
1017     /* Make accesses to pages with watchpoints go via the
1018        watchpoint trap routines.  */
1019     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1020         if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1021             /* Avoid trapping reads of pages with a write breakpoint. */
1022             if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1023                 iotlb = PHYS_SECTION_WATCH + paddr;
1024                 *address |= TLB_MMIO;
1025                 break;
1026             }
1027         }
1028     }
1029
1030     return iotlb;
1031 }
1032 #endif /* defined(CONFIG_USER_ONLY) */
1033
1034 #if !defined(CONFIG_USER_ONLY)
1035
1036 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1037                              uint16_t section);
1038 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1039
1040 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1041                                qemu_anon_ram_alloc;
1042
1043 /*
1044  * Set a custom physical guest memory alloator.
1045  * Accelerators with unusual needs may need this.  Hopefully, we can
1046  * get rid of it eventually.
1047  */
1048 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1049 {
1050     phys_mem_alloc = alloc;
1051 }
1052
1053 static uint16_t phys_section_add(PhysPageMap *map,
1054                                  MemoryRegionSection *section)
1055 {
1056     /* The physical section number is ORed with a page-aligned
1057      * pointer to produce the iotlb entries.  Thus it should
1058      * never overflow into the page-aligned value.
1059      */
1060     assert(map->sections_nb < TARGET_PAGE_SIZE);
1061
1062     if (map->sections_nb == map->sections_nb_alloc) {
1063         map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1064         map->sections = g_renew(MemoryRegionSection, map->sections,
1065                                 map->sections_nb_alloc);
1066     }
1067     map->sections[map->sections_nb] = *section;
1068     memory_region_ref(section->mr);
1069     return map->sections_nb++;
1070 }
1071
1072 static void phys_section_destroy(MemoryRegion *mr)
1073 {
1074     memory_region_unref(mr);
1075
1076     if (mr->subpage) {
1077         subpage_t *subpage = container_of(mr, subpage_t, iomem);
1078         object_unref(OBJECT(&subpage->iomem));
1079         g_free(subpage);
1080     }
1081 }
1082
1083 static void phys_sections_free(PhysPageMap *map)
1084 {
1085     while (map->sections_nb > 0) {
1086         MemoryRegionSection *section = &map->sections[--map->sections_nb];
1087         phys_section_destroy(section->mr);
1088     }
1089     g_free(map->sections);
1090     g_free(map->nodes);
1091 }
1092
1093 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1094 {
1095     subpage_t *subpage;
1096     hwaddr base = section->offset_within_address_space
1097         & TARGET_PAGE_MASK;
1098     MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1099                                                    d->map.nodes, d->map.sections);
1100     MemoryRegionSection subsection = {
1101         .offset_within_address_space = base,
1102         .size = int128_make64(TARGET_PAGE_SIZE),
1103     };
1104     hwaddr start, end;
1105
1106     assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1107
1108     if (!(existing->mr->subpage)) {
1109         subpage = subpage_init(d->as, base);
1110         subsection.address_space = d->as;
1111         subsection.mr = &subpage->iomem;
1112         phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1113                       phys_section_add(&d->map, &subsection));
1114     } else {
1115         subpage = container_of(existing->mr, subpage_t, iomem);
1116     }
1117     start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1118     end = start + int128_get64(section->size) - 1;
1119     subpage_register(subpage, start, end,
1120                      phys_section_add(&d->map, section));
1121 }
1122
1123
1124 static void register_multipage(AddressSpaceDispatch *d,
1125                                MemoryRegionSection *section)
1126 {
1127     hwaddr start_addr = section->offset_within_address_space;
1128     uint16_t section_index = phys_section_add(&d->map, section);
1129     uint64_t num_pages = int128_get64(int128_rshift(section->size,
1130                                                     TARGET_PAGE_BITS));
1131
1132     assert(num_pages);
1133     phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1134 }
1135
1136 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1137 {
1138     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1139     AddressSpaceDispatch *d = as->next_dispatch;
1140     MemoryRegionSection now = *section, remain = *section;
1141     Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1142
1143     if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1144         uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1145                        - now.offset_within_address_space;
1146
1147         now.size = int128_min(int128_make64(left), now.size);
1148         register_subpage(d, &now);
1149     } else {
1150         now.size = int128_zero();
1151     }
1152     while (int128_ne(remain.size, now.size)) {
1153         remain.size = int128_sub(remain.size, now.size);
1154         remain.offset_within_address_space += int128_get64(now.size);
1155         remain.offset_within_region += int128_get64(now.size);
1156         now = remain;
1157         if (int128_lt(remain.size, page_size)) {
1158             register_subpage(d, &now);
1159         } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1160             now.size = page_size;
1161             register_subpage(d, &now);
1162         } else {
1163             now.size = int128_and(now.size, int128_neg(page_size));
1164             register_multipage(d, &now);
1165         }
1166     }
1167 }
1168
1169 void qemu_flush_coalesced_mmio_buffer(void)
1170 {
1171     if (kvm_enabled())
1172         kvm_flush_coalesced_mmio_buffer();
1173 }
1174
1175 void qemu_mutex_lock_ramlist(void)
1176 {
1177     qemu_mutex_lock(&ram_list.mutex);
1178 }
1179
1180 void qemu_mutex_unlock_ramlist(void)
1181 {
1182     qemu_mutex_unlock(&ram_list.mutex);
1183 }
1184
1185 #ifdef __linux__
1186
1187 #include <sys/vfs.h>
1188
1189 #define HUGETLBFS_MAGIC       0x958458f6
1190
1191 static long gethugepagesize(const char *path, Error **errp)
1192 {
1193     struct statfs fs;
1194     int ret;
1195
1196     do {
1197         ret = statfs(path, &fs);
1198     } while (ret != 0 && errno == EINTR);
1199
1200     if (ret != 0) {
1201         error_setg_errno(errp, errno, "failed to get page size of file %s",
1202                          path);
1203         return 0;
1204     }
1205
1206     if (!qtest_driver() &&
1207         fs.f_type != HUGETLBFS_MAGIC) {
1208         fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1209     }
1210
1211     return fs.f_bsize;
1212 }
1213
1214 static void *file_ram_alloc(RAMBlock *block,
1215                             ram_addr_t memory,
1216                             const char *path,
1217                             Error **errp)
1218 {
1219     struct stat st;
1220     char *filename;
1221     char *sanitized_name;
1222     char *c;
1223     void * volatile area = NULL;
1224     int fd;
1225     uint64_t hpagesize;
1226     Error *local_err = NULL;
1227
1228     hpagesize = gethugepagesize(path, &local_err);
1229     if (local_err) {
1230         error_propagate(errp, local_err);
1231         goto error;
1232     }
1233     block->mr->align = hpagesize;
1234
1235     if (memory < hpagesize) {
1236         error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1237                    "or larger than huge page size 0x%" PRIx64,
1238                    memory, hpagesize);
1239         goto error;
1240     }
1241
1242     if (kvm_enabled() && !kvm_has_sync_mmu()) {
1243         error_setg(errp,
1244                    "host lacks kvm mmu notifiers, -mem-path unsupported");
1245         goto error;
1246     }
1247
1248     if (!stat(path, &st) && S_ISDIR(st.st_mode)) {
1249         /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1250         sanitized_name = g_strdup(memory_region_name(block->mr));
1251         for (c = sanitized_name; *c != '\0'; c++) {
1252             if (*c == '/') {
1253                 *c = '_';
1254             }
1255         }
1256
1257         filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1258                                    sanitized_name);
1259         g_free(sanitized_name);
1260
1261         fd = mkstemp(filename);
1262         if (fd >= 0) {
1263             unlink(filename);
1264         }
1265         g_free(filename);
1266     } else {
1267         fd = open(path, O_RDWR | O_CREAT, 0644);
1268     }
1269
1270     if (fd < 0) {
1271         error_setg_errno(errp, errno,
1272                          "unable to create backing store for hugepages");
1273         goto error;
1274     }
1275
1276     memory = ROUND_UP(memory, hpagesize);
1277
1278     /*
1279      * ftruncate is not supported by hugetlbfs in older
1280      * hosts, so don't bother bailing out on errors.
1281      * If anything goes wrong with it under other filesystems,
1282      * mmap will fail.
1283      */
1284     if (ftruncate(fd, memory)) {
1285         perror("ftruncate");
1286     }
1287
1288     area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
1289     if (area == MAP_FAILED) {
1290         error_setg_errno(errp, errno,
1291                          "unable to map backing store for hugepages");
1292         close(fd);
1293         goto error;
1294     }
1295
1296     if (mem_prealloc) {
1297         os_mem_prealloc(fd, area, memory);
1298     }
1299
1300     block->fd = fd;
1301     return area;
1302
1303 error:
1304     return NULL;
1305 }
1306 #endif
1307
1308 /* Called with the ramlist lock held.  */
1309 static ram_addr_t find_ram_offset(ram_addr_t size)
1310 {
1311     RAMBlock *block, *next_block;
1312     ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1313
1314     assert(size != 0); /* it would hand out same offset multiple times */
1315
1316     if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1317         return 0;
1318     }
1319
1320     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1321         ram_addr_t end, next = RAM_ADDR_MAX;
1322
1323         end = block->offset + block->max_length;
1324
1325         QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1326             if (next_block->offset >= end) {
1327                 next = MIN(next, next_block->offset);
1328             }
1329         }
1330         if (next - end >= size && next - end < mingap) {
1331             offset = end;
1332             mingap = next - end;
1333         }
1334     }
1335
1336     if (offset == RAM_ADDR_MAX) {
1337         fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1338                 (uint64_t)size);
1339         abort();
1340     }
1341
1342     return offset;
1343 }
1344
1345 ram_addr_t last_ram_offset(void)
1346 {
1347     RAMBlock *block;
1348     ram_addr_t last = 0;
1349
1350     rcu_read_lock();
1351     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1352         last = MAX(last, block->offset + block->max_length);
1353     }
1354     rcu_read_unlock();
1355     return last;
1356 }
1357
1358 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1359 {
1360     int ret;
1361
1362     /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1363     if (!machine_dump_guest_core(current_machine)) {
1364         ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1365         if (ret) {
1366             perror("qemu_madvise");
1367             fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1368                             "but dump_guest_core=off specified\n");
1369         }
1370     }
1371 }
1372
1373 /* Called within an RCU critical section, or while the ramlist lock
1374  * is held.
1375  */
1376 static RAMBlock *find_ram_block(ram_addr_t addr)
1377 {
1378     RAMBlock *block;
1379
1380     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1381         if (block->offset == addr) {
1382             return block;
1383         }
1384     }
1385
1386     return NULL;
1387 }
1388
1389 const char *qemu_ram_get_idstr(RAMBlock *rb)
1390 {
1391     return rb->idstr;
1392 }
1393
1394 /* Called with iothread lock held.  */
1395 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1396 {
1397     RAMBlock *new_block, *block;
1398
1399     rcu_read_lock();
1400     new_block = find_ram_block(addr);
1401     assert(new_block);
1402     assert(!new_block->idstr[0]);
1403
1404     if (dev) {
1405         char *id = qdev_get_dev_path(dev);
1406         if (id) {
1407             snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1408             g_free(id);
1409         }
1410     }
1411     pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1412
1413     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1414         if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1415             fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1416                     new_block->idstr);
1417             abort();
1418         }
1419     }
1420     rcu_read_unlock();
1421 }
1422
1423 /* Called with iothread lock held.  */
1424 void qemu_ram_unset_idstr(ram_addr_t addr)
1425 {
1426     RAMBlock *block;
1427
1428     /* FIXME: arch_init.c assumes that this is not called throughout
1429      * migration.  Ignore the problem since hot-unplug during migration
1430      * does not work anyway.
1431      */
1432
1433     rcu_read_lock();
1434     block = find_ram_block(addr);
1435     if (block) {
1436         memset(block->idstr, 0, sizeof(block->idstr));
1437     }
1438     rcu_read_unlock();
1439 }
1440
1441 static int memory_try_enable_merging(void *addr, size_t len)
1442 {
1443     if (!machine_mem_merge(current_machine)) {
1444         /* disabled by the user */
1445         return 0;
1446     }
1447
1448     return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1449 }
1450
1451 /* Only legal before guest might have detected the memory size: e.g. on
1452  * incoming migration, or right after reset.
1453  *
1454  * As memory core doesn't know how is memory accessed, it is up to
1455  * resize callback to update device state and/or add assertions to detect
1456  * misuse, if necessary.
1457  */
1458 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1459 {
1460     RAMBlock *block = find_ram_block(base);
1461
1462     assert(block);
1463
1464     newsize = HOST_PAGE_ALIGN(newsize);
1465
1466     if (block->used_length == newsize) {
1467         return 0;
1468     }
1469
1470     if (!(block->flags & RAM_RESIZEABLE)) {
1471         error_setg_errno(errp, EINVAL,
1472                          "Length mismatch: %s: 0x" RAM_ADDR_FMT
1473                          " in != 0x" RAM_ADDR_FMT, block->idstr,
1474                          newsize, block->used_length);
1475         return -EINVAL;
1476     }
1477
1478     if (block->max_length < newsize) {
1479         error_setg_errno(errp, EINVAL,
1480                          "Length too large: %s: 0x" RAM_ADDR_FMT
1481                          " > 0x" RAM_ADDR_FMT, block->idstr,
1482                          newsize, block->max_length);
1483         return -EINVAL;
1484     }
1485
1486     cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1487     block->used_length = newsize;
1488     cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1489                                         DIRTY_CLIENTS_ALL);
1490     memory_region_set_size(block->mr, newsize);
1491     if (block->resized) {
1492         block->resized(block->idstr, newsize, block->host);
1493     }
1494     return 0;
1495 }
1496
1497 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1498 {
1499     RAMBlock *block;
1500     RAMBlock *last_block = NULL;
1501     ram_addr_t old_ram_size, new_ram_size;
1502
1503     old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1504
1505     qemu_mutex_lock_ramlist();
1506     new_block->offset = find_ram_offset(new_block->max_length);
1507
1508     if (!new_block->host) {
1509         if (xen_enabled()) {
1510             xen_ram_alloc(new_block->offset, new_block->max_length,
1511                           new_block->mr);
1512         } else {
1513             new_block->host = phys_mem_alloc(new_block->max_length,
1514                                              &new_block->mr->align);
1515             if (!new_block->host) {
1516                 error_setg_errno(errp, errno,
1517                                  "cannot set up guest memory '%s'",
1518                                  memory_region_name(new_block->mr));
1519                 qemu_mutex_unlock_ramlist();
1520                 return -1;
1521             }
1522             memory_try_enable_merging(new_block->host, new_block->max_length);
1523         }
1524     }
1525
1526     new_ram_size = MAX(old_ram_size,
1527               (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1528     if (new_ram_size > old_ram_size) {
1529         migration_bitmap_extend(old_ram_size, new_ram_size);
1530     }
1531     /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
1532      * QLIST (which has an RCU-friendly variant) does not have insertion at
1533      * tail, so save the last element in last_block.
1534      */
1535     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1536         last_block = block;
1537         if (block->max_length < new_block->max_length) {
1538             break;
1539         }
1540     }
1541     if (block) {
1542         QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1543     } else if (last_block) {
1544         QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1545     } else { /* list is empty */
1546         QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1547     }
1548     ram_list.mru_block = NULL;
1549
1550     /* Write list before version */
1551     smp_wmb();
1552     ram_list.version++;
1553     qemu_mutex_unlock_ramlist();
1554
1555     new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1556
1557     if (new_ram_size > old_ram_size) {
1558         int i;
1559
1560         /* ram_list.dirty_memory[] is protected by the iothread lock.  */
1561         for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1562             ram_list.dirty_memory[i] =
1563                 bitmap_zero_extend(ram_list.dirty_memory[i],
1564                                    old_ram_size, new_ram_size);
1565        }
1566     }
1567     cpu_physical_memory_set_dirty_range(new_block->offset,
1568                                         new_block->used_length,
1569                                         DIRTY_CLIENTS_ALL);
1570
1571     if (new_block->host) {
1572         qemu_ram_setup_dump(new_block->host, new_block->max_length);
1573         qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1574         qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1575         if (kvm_enabled()) {
1576             kvm_setup_guest_memory(new_block->host, new_block->max_length);
1577         }
1578     }
1579
1580     return new_block->offset;
1581 }
1582
1583 #ifdef __linux__
1584 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1585                                     bool share, const char *mem_path,
1586                                     Error **errp)
1587 {
1588     RAMBlock *new_block;
1589     ram_addr_t addr;
1590     Error *local_err = NULL;
1591
1592     if (xen_enabled()) {
1593         error_setg(errp, "-mem-path not supported with Xen");
1594         return -1;
1595     }
1596
1597     if (phys_mem_alloc != qemu_anon_ram_alloc) {
1598         /*
1599          * file_ram_alloc() needs to allocate just like
1600          * phys_mem_alloc, but we haven't bothered to provide
1601          * a hook there.
1602          */
1603         error_setg(errp,
1604                    "-mem-path not supported with this accelerator");
1605         return -1;
1606     }
1607
1608     size = HOST_PAGE_ALIGN(size);
1609     new_block = g_malloc0(sizeof(*new_block));
1610     new_block->mr = mr;
1611     new_block->used_length = size;
1612     new_block->max_length = size;
1613     new_block->flags = share ? RAM_SHARED : 0;
1614     new_block->flags |= RAM_FILE;
1615     new_block->host = file_ram_alloc(new_block, size,
1616                                      mem_path, errp);
1617     if (!new_block->host) {
1618         g_free(new_block);
1619         return -1;
1620     }
1621
1622     addr = ram_block_add(new_block, &local_err);
1623     if (local_err) {
1624         g_free(new_block);
1625         error_propagate(errp, local_err);
1626         return -1;
1627     }
1628     return addr;
1629 }
1630 #endif
1631
1632 static
1633 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1634                                    void (*resized)(const char*,
1635                                                    uint64_t length,
1636                                                    void *host),
1637                                    void *host, bool resizeable,
1638                                    MemoryRegion *mr, Error **errp)
1639 {
1640     RAMBlock *new_block;
1641     ram_addr_t addr;
1642     Error *local_err = NULL;
1643
1644     size = HOST_PAGE_ALIGN(size);
1645     max_size = HOST_PAGE_ALIGN(max_size);
1646     new_block = g_malloc0(sizeof(*new_block));
1647     new_block->mr = mr;
1648     new_block->resized = resized;
1649     new_block->used_length = size;
1650     new_block->max_length = max_size;
1651     assert(max_size >= size);
1652     new_block->fd = -1;
1653     new_block->host = host;
1654     if (host) {
1655         new_block->flags |= RAM_PREALLOC;
1656     }
1657     if (resizeable) {
1658         new_block->flags |= RAM_RESIZEABLE;
1659     }
1660     addr = ram_block_add(new_block, &local_err);
1661     if (local_err) {
1662         g_free(new_block);
1663         error_propagate(errp, local_err);
1664         return -1;
1665     }
1666     return addr;
1667 }
1668
1669 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1670                                    MemoryRegion *mr, Error **errp)
1671 {
1672     return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1673 }
1674
1675 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1676 {
1677     return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1678 }
1679
1680 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1681                                      void (*resized)(const char*,
1682                                                      uint64_t length,
1683                                                      void *host),
1684                                      MemoryRegion *mr, Error **errp)
1685 {
1686     return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1687 }
1688
1689 void qemu_ram_free_from_ptr(ram_addr_t addr)
1690 {
1691     RAMBlock *block;
1692
1693     qemu_mutex_lock_ramlist();
1694     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1695         if (addr == block->offset) {
1696             QLIST_REMOVE_RCU(block, next);
1697             ram_list.mru_block = NULL;
1698             /* Write list before version */
1699             smp_wmb();
1700             ram_list.version++;
1701             g_free_rcu(block, rcu);
1702             break;
1703         }
1704     }
1705     qemu_mutex_unlock_ramlist();
1706 }
1707
1708 static void reclaim_ramblock(RAMBlock *block)
1709 {
1710     if (block->flags & RAM_PREALLOC) {
1711         ;
1712     } else if (xen_enabled()) {
1713         xen_invalidate_map_cache_entry(block->host);
1714 #ifndef _WIN32
1715     } else if (block->fd >= 0) {
1716         if (block->flags & RAM_FILE) {
1717             qemu_ram_munmap(block->host, block->max_length);
1718         } else {
1719             munmap(block->host, block->max_length);
1720         }
1721         close(block->fd);
1722 #endif
1723     } else {
1724         qemu_anon_ram_free(block->host, block->max_length);
1725     }
1726     g_free(block);
1727 }
1728
1729 void qemu_ram_free(ram_addr_t addr)
1730 {
1731     RAMBlock *block;
1732
1733     qemu_mutex_lock_ramlist();
1734     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1735         if (addr == block->offset) {
1736             QLIST_REMOVE_RCU(block, next);
1737             ram_list.mru_block = NULL;
1738             /* Write list before version */
1739             smp_wmb();
1740             ram_list.version++;
1741             call_rcu(block, reclaim_ramblock, rcu);
1742             break;
1743         }
1744     }
1745     qemu_mutex_unlock_ramlist();
1746 }
1747
1748 #ifndef _WIN32
1749 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1750 {
1751     RAMBlock *block;
1752     ram_addr_t offset;
1753     int flags;
1754     void *area, *vaddr;
1755
1756     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1757         offset = addr - block->offset;
1758         if (offset < block->max_length) {
1759             vaddr = ramblock_ptr(block, offset);
1760             if (block->flags & RAM_PREALLOC) {
1761                 ;
1762             } else if (xen_enabled()) {
1763                 abort();
1764             } else {
1765                 flags = MAP_FIXED;
1766                 if (block->fd >= 0) {
1767                     flags |= (block->flags & RAM_SHARED ?
1768                               MAP_SHARED : MAP_PRIVATE);
1769                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1770                                 flags, block->fd, offset);
1771                 } else {
1772                     /*
1773                      * Remap needs to match alloc.  Accelerators that
1774                      * set phys_mem_alloc never remap.  If they did,
1775                      * we'd need a remap hook here.
1776                      */
1777                     assert(phys_mem_alloc == qemu_anon_ram_alloc);
1778
1779                     flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1780                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1781                                 flags, -1, 0);
1782                 }
1783                 if (area != vaddr) {
1784                     fprintf(stderr, "Could not remap addr: "
1785                             RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1786                             length, addr);
1787                     exit(1);
1788                 }
1789                 memory_try_enable_merging(vaddr, length);
1790                 qemu_ram_setup_dump(vaddr, length);
1791             }
1792         }
1793     }
1794 }
1795 #endif /* !_WIN32 */
1796
1797 int qemu_get_ram_fd(ram_addr_t addr)
1798 {
1799     RAMBlock *block;
1800     int fd;
1801
1802     rcu_read_lock();
1803     block = qemu_get_ram_block(addr);
1804     fd = block->fd;
1805     rcu_read_unlock();
1806     return fd;
1807 }
1808
1809 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1810 {
1811     RAMBlock *block;
1812     void *ptr;
1813
1814     rcu_read_lock();
1815     block = qemu_get_ram_block(addr);
1816     ptr = ramblock_ptr(block, 0);
1817     rcu_read_unlock();
1818     return ptr;
1819 }
1820
1821 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1822  * This should not be used for general purpose DMA.  Use address_space_map
1823  * or address_space_rw instead. For local memory (e.g. video ram) that the
1824  * device owns, use memory_region_get_ram_ptr.
1825  *
1826  * By the time this function returns, the returned pointer is not protected
1827  * by RCU anymore.  If the caller is not within an RCU critical section and
1828  * does not hold the iothread lock, it must have other means of protecting the
1829  * pointer, such as a reference to the region that includes the incoming
1830  * ram_addr_t.
1831  */
1832 void *qemu_get_ram_ptr(ram_addr_t addr)
1833 {
1834     RAMBlock *block;
1835     void *ptr;
1836
1837     rcu_read_lock();
1838     block = qemu_get_ram_block(addr);
1839
1840     if (xen_enabled() && block->host == NULL) {
1841         /* We need to check if the requested address is in the RAM
1842          * because we don't want to map the entire memory in QEMU.
1843          * In that case just map until the end of the page.
1844          */
1845         if (block->offset == 0) {
1846             ptr = xen_map_cache(addr, 0, 0);
1847             goto unlock;
1848         }
1849
1850         block->host = xen_map_cache(block->offset, block->max_length, 1);
1851     }
1852     ptr = ramblock_ptr(block, addr - block->offset);
1853
1854 unlock:
1855     rcu_read_unlock();
1856     return ptr;
1857 }
1858
1859 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1860  * but takes a size argument.
1861  *
1862  * By the time this function returns, the returned pointer is not protected
1863  * by RCU anymore.  If the caller is not within an RCU critical section and
1864  * does not hold the iothread lock, it must have other means of protecting the
1865  * pointer, such as a reference to the region that includes the incoming
1866  * ram_addr_t.
1867  */
1868 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1869 {
1870     void *ptr;
1871     if (*size == 0) {
1872         return NULL;
1873     }
1874     if (xen_enabled()) {
1875         return xen_map_cache(addr, *size, 1);
1876     } else {
1877         RAMBlock *block;
1878         rcu_read_lock();
1879         QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1880             if (addr - block->offset < block->max_length) {
1881                 if (addr - block->offset + *size > block->max_length)
1882                     *size = block->max_length - addr + block->offset;
1883                 ptr = ramblock_ptr(block, addr - block->offset);
1884                 rcu_read_unlock();
1885                 return ptr;
1886             }
1887         }
1888
1889         fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1890         abort();
1891     }
1892 }
1893
1894 /*
1895  * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1896  * in that RAMBlock.
1897  *
1898  * ptr: Host pointer to look up
1899  * round_offset: If true round the result offset down to a page boundary
1900  * *ram_addr: set to result ram_addr
1901  * *offset: set to result offset within the RAMBlock
1902  *
1903  * Returns: RAMBlock (or NULL if not found)
1904  *
1905  * By the time this function returns, the returned pointer is not protected
1906  * by RCU anymore.  If the caller is not within an RCU critical section and
1907  * does not hold the iothread lock, it must have other means of protecting the
1908  * pointer, such as a reference to the region that includes the incoming
1909  * ram_addr_t.
1910  */
1911 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1912                                    ram_addr_t *ram_addr,
1913                                    ram_addr_t *offset)
1914 {
1915     RAMBlock *block;
1916     uint8_t *host = ptr;
1917
1918     if (xen_enabled()) {
1919         rcu_read_lock();
1920         *ram_addr = xen_ram_addr_from_mapcache(ptr);
1921         block = qemu_get_ram_block(*ram_addr);
1922         if (block) {
1923             *offset = (host - block->host);
1924         }
1925         rcu_read_unlock();
1926         return block;
1927     }
1928
1929     rcu_read_lock();
1930     block = atomic_rcu_read(&ram_list.mru_block);
1931     if (block && block->host && host - block->host < block->max_length) {
1932         goto found;
1933     }
1934
1935     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1936         /* This case append when the block is not mapped. */
1937         if (block->host == NULL) {
1938             continue;
1939         }
1940         if (host - block->host < block->max_length) {
1941             goto found;
1942         }
1943     }
1944
1945     rcu_read_unlock();
1946     return NULL;
1947
1948 found:
1949     *offset = (host - block->host);
1950     if (round_offset) {
1951         *offset &= TARGET_PAGE_MASK;
1952     }
1953     *ram_addr = block->offset + *offset;
1954     rcu_read_unlock();
1955     return block;
1956 }
1957
1958 /*
1959  * Finds the named RAMBlock
1960  *
1961  * name: The name of RAMBlock to find
1962  *
1963  * Returns: RAMBlock (or NULL if not found)
1964  */
1965 RAMBlock *qemu_ram_block_by_name(const char *name)
1966 {
1967     RAMBlock *block;
1968
1969     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1970         if (!strcmp(name, block->idstr)) {
1971             return block;
1972         }
1973     }
1974
1975     return NULL;
1976 }
1977
1978 /* Some of the softmmu routines need to translate from a host pointer
1979    (typically a TLB entry) back to a ram offset.  */
1980 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1981 {
1982     RAMBlock *block;
1983     ram_addr_t offset; /* Not used */
1984
1985     block = qemu_ram_block_from_host(ptr, false, ram_addr, &offset);
1986
1987     if (!block) {
1988         return NULL;
1989     }
1990
1991     return block->mr;
1992 }
1993
1994 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1995                                uint64_t val, unsigned size)
1996 {
1997     if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1998         tb_invalidate_phys_page_fast(ram_addr, size);
1999     }
2000     switch (size) {
2001     case 1:
2002         stb_p(qemu_get_ram_ptr(ram_addr), val);
2003         break;
2004     case 2:
2005         stw_p(qemu_get_ram_ptr(ram_addr), val);
2006         break;
2007     case 4:
2008         stl_p(qemu_get_ram_ptr(ram_addr), val);
2009         break;
2010     default:
2011         abort();
2012     }
2013     /* Set both VGA and migration bits for simplicity and to remove
2014      * the notdirty callback faster.
2015      */
2016     cpu_physical_memory_set_dirty_range(ram_addr, size,
2017                                         DIRTY_CLIENTS_NOCODE);
2018     /* we remove the notdirty callback only if the code has been
2019        flushed */
2020     if (!cpu_physical_memory_is_clean(ram_addr)) {
2021         tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2022     }
2023 }
2024
2025 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2026                                  unsigned size, bool is_write)
2027 {
2028     return is_write;
2029 }
2030
2031 static const MemoryRegionOps notdirty_mem_ops = {
2032     .write = notdirty_mem_write,
2033     .valid.accepts = notdirty_mem_accepts,
2034     .endianness = DEVICE_NATIVE_ENDIAN,
2035 };
2036
2037 /* Generate a debug exception if a watchpoint has been hit.  */
2038 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2039 {
2040     CPUState *cpu = current_cpu;
2041     CPUArchState *env = cpu->env_ptr;
2042     target_ulong pc, cs_base;
2043     target_ulong vaddr;
2044     CPUWatchpoint *wp;
2045     int cpu_flags;
2046
2047     if (cpu->watchpoint_hit) {
2048         /* We re-entered the check after replacing the TB. Now raise
2049          * the debug interrupt so that is will trigger after the
2050          * current instruction. */
2051         cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2052         return;
2053     }
2054     vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2055     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2056         if (cpu_watchpoint_address_matches(wp, vaddr, len)
2057             && (wp->flags & flags)) {
2058             if (flags == BP_MEM_READ) {
2059                 wp->flags |= BP_WATCHPOINT_HIT_READ;
2060             } else {
2061                 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2062             }
2063             wp->hitaddr = vaddr;
2064             wp->hitattrs = attrs;
2065             if (!cpu->watchpoint_hit) {
2066                 cpu->watchpoint_hit = wp;
2067                 tb_check_watchpoint(cpu);
2068                 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2069                     cpu->exception_index = EXCP_DEBUG;
2070                     cpu_loop_exit(cpu);
2071                 } else {
2072                     cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2073                     tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2074                     cpu_resume_from_signal(cpu, NULL);
2075                 }
2076             }
2077         } else {
2078             wp->flags &= ~BP_WATCHPOINT_HIT;
2079         }
2080     }
2081 }
2082
2083 /* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
2084    so these check for a hit then pass through to the normal out-of-line
2085    phys routines.  */
2086 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2087                                   unsigned size, MemTxAttrs attrs)
2088 {
2089     MemTxResult res;
2090     uint64_t data;
2091
2092     check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2093     switch (size) {
2094     case 1:
2095         data = address_space_ldub(&address_space_memory, addr, attrs, &res);
2096         break;
2097     case 2:
2098         data = address_space_lduw(&address_space_memory, addr, attrs, &res);
2099         break;
2100     case 4:
2101         data = address_space_ldl(&address_space_memory, addr, attrs, &res);
2102         break;
2103     default: abort();
2104     }
2105     *pdata = data;
2106     return res;
2107 }
2108
2109 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2110                                    uint64_t val, unsigned size,
2111                                    MemTxAttrs attrs)
2112 {
2113     MemTxResult res;
2114
2115     check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2116     switch (size) {
2117     case 1:
2118         address_space_stb(&address_space_memory, addr, val, attrs, &res);
2119         break;
2120     case 2:
2121         address_space_stw(&address_space_memory, addr, val, attrs, &res);
2122         break;
2123     case 4:
2124         address_space_stl(&address_space_memory, addr, val, attrs, &res);
2125         break;
2126     default: abort();
2127     }
2128     return res;
2129 }
2130
2131 static const MemoryRegionOps watch_mem_ops = {
2132     .read_with_attrs = watch_mem_read,
2133     .write_with_attrs = watch_mem_write,
2134     .endianness = DEVICE_NATIVE_ENDIAN,
2135 };
2136
2137 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2138                                 unsigned len, MemTxAttrs attrs)
2139 {
2140     subpage_t *subpage = opaque;
2141     uint8_t buf[8];
2142     MemTxResult res;
2143
2144 #if defined(DEBUG_SUBPAGE)
2145     printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2146            subpage, len, addr);
2147 #endif
2148     res = address_space_read(subpage->as, addr + subpage->base,
2149                              attrs, buf, len);
2150     if (res) {
2151         return res;
2152     }
2153     switch (len) {
2154     case 1:
2155         *data = ldub_p(buf);
2156         return MEMTX_OK;
2157     case 2:
2158         *data = lduw_p(buf);
2159         return MEMTX_OK;
2160     case 4:
2161         *data = ldl_p(buf);
2162         return MEMTX_OK;
2163     case 8:
2164         *data = ldq_p(buf);
2165         return MEMTX_OK;
2166     default:
2167         abort();
2168     }
2169 }
2170
2171 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2172                                  uint64_t value, unsigned len, MemTxAttrs attrs)
2173 {
2174     subpage_t *subpage = opaque;
2175     uint8_t buf[8];
2176
2177 #if defined(DEBUG_SUBPAGE)
2178     printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2179            " value %"PRIx64"\n",
2180            __func__, subpage, len, addr, value);
2181 #endif
2182     switch (len) {
2183     case 1:
2184         stb_p(buf, value);
2185         break;
2186     case 2:
2187         stw_p(buf, value);
2188         break;
2189     case 4:
2190         stl_p(buf, value);
2191         break;
2192     case 8:
2193         stq_p(buf, value);
2194         break;
2195     default:
2196         abort();
2197     }
2198     return address_space_write(subpage->as, addr + subpage->base,
2199                                attrs, buf, len);
2200 }
2201
2202 static bool subpage_accepts(void *opaque, hwaddr addr,
2203                             unsigned len, bool is_write)
2204 {
2205     subpage_t *subpage = opaque;
2206 #if defined(DEBUG_SUBPAGE)
2207     printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2208            __func__, subpage, is_write ? 'w' : 'r', len, addr);
2209 #endif
2210
2211     return address_space_access_valid(subpage->as, addr + subpage->base,
2212                                       len, is_write);
2213 }
2214
2215 static const MemoryRegionOps subpage_ops = {
2216     .read_with_attrs = subpage_read,
2217     .write_with_attrs = subpage_write,
2218     .impl.min_access_size = 1,
2219     .impl.max_access_size = 8,
2220     .valid.min_access_size = 1,
2221     .valid.max_access_size = 8,
2222     .valid.accepts = subpage_accepts,
2223     .endianness = DEVICE_NATIVE_ENDIAN,
2224 };
2225
2226 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2227                              uint16_t section)
2228 {
2229     int idx, eidx;
2230
2231     if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2232         return -1;
2233     idx = SUBPAGE_IDX(start);
2234     eidx = SUBPAGE_IDX(end);
2235 #if defined(DEBUG_SUBPAGE)
2236     printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2237            __func__, mmio, start, end, idx, eidx, section);
2238 #endif
2239     for (; idx <= eidx; idx++) {
2240         mmio->sub_section[idx] = section;
2241     }
2242
2243     return 0;
2244 }
2245
2246 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2247 {
2248     subpage_t *mmio;
2249
2250     mmio = g_malloc0(sizeof(subpage_t));
2251
2252     mmio->as = as;
2253     mmio->base = base;
2254     memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2255                           NULL, TARGET_PAGE_SIZE);
2256     mmio->iomem.subpage = true;
2257 #if defined(DEBUG_SUBPAGE)
2258     printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2259            mmio, base, TARGET_PAGE_SIZE);
2260 #endif
2261     subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2262
2263     return mmio;
2264 }
2265
2266 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2267                               MemoryRegion *mr)
2268 {
2269     assert(as);
2270     MemoryRegionSection section = {
2271         .address_space = as,
2272         .mr = mr,
2273         .offset_within_address_space = 0,
2274         .offset_within_region = 0,
2275         .size = int128_2_64(),
2276     };
2277
2278     return phys_section_add(map, &section);
2279 }
2280
2281 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
2282 {
2283     CPUAddressSpace *cpuas = &cpu->cpu_ases[0];
2284     AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2285     MemoryRegionSection *sections = d->map.sections;
2286
2287     return sections[index & ~TARGET_PAGE_MASK].mr;
2288 }
2289
2290 static void io_mem_init(void)
2291 {
2292     memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2293     memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2294                           NULL, UINT64_MAX);
2295     memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2296                           NULL, UINT64_MAX);
2297     memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2298                           NULL, UINT64_MAX);
2299 }
2300
2301 static void mem_begin(MemoryListener *listener)
2302 {
2303     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2304     AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2305     uint16_t n;
2306
2307     n = dummy_section(&d->map, as, &io_mem_unassigned);
2308     assert(n == PHYS_SECTION_UNASSIGNED);
2309     n = dummy_section(&d->map, as, &io_mem_notdirty);
2310     assert(n == PHYS_SECTION_NOTDIRTY);
2311     n = dummy_section(&d->map, as, &io_mem_rom);
2312     assert(n == PHYS_SECTION_ROM);
2313     n = dummy_section(&d->map, as, &io_mem_watch);
2314     assert(n == PHYS_SECTION_WATCH);
2315
2316     d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2317     d->as = as;
2318     as->next_dispatch = d;
2319 }
2320
2321 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2322 {
2323     phys_sections_free(&d->map);
2324     g_free(d);
2325 }
2326
2327 static void mem_commit(MemoryListener *listener)
2328 {
2329     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2330     AddressSpaceDispatch *cur = as->dispatch;
2331     AddressSpaceDispatch *next = as->next_dispatch;
2332
2333     phys_page_compact_all(next, next->map.nodes_nb);
2334
2335     atomic_rcu_set(&as->dispatch, next);
2336     if (cur) {
2337         call_rcu(cur, address_space_dispatch_free, rcu);
2338     }
2339 }
2340
2341 static void tcg_commit(MemoryListener *listener)
2342 {
2343     CPUAddressSpace *cpuas;
2344     AddressSpaceDispatch *d;
2345
2346     /* since each CPU stores ram addresses in its TLB cache, we must
2347        reset the modified entries */
2348     cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2349     cpu_reloading_memory_map();
2350     /* The CPU and TLB are protected by the iothread lock.
2351      * We reload the dispatch pointer now because cpu_reloading_memory_map()
2352      * may have split the RCU critical section.
2353      */
2354     d = atomic_rcu_read(&cpuas->as->dispatch);
2355     cpuas->memory_dispatch = d;
2356     tlb_flush(cpuas->cpu, 1);
2357 }
2358
2359 void address_space_init_dispatch(AddressSpace *as)
2360 {
2361     as->dispatch = NULL;
2362     as->dispatch_listener = (MemoryListener) {
2363         .begin = mem_begin,
2364         .commit = mem_commit,
2365         .region_add = mem_add,
2366         .region_nop = mem_add,
2367         .priority = 0,
2368     };
2369     memory_listener_register(&as->dispatch_listener, as);
2370 }
2371
2372 void address_space_unregister(AddressSpace *as)
2373 {
2374     memory_listener_unregister(&as->dispatch_listener);
2375 }
2376
2377 void address_space_destroy_dispatch(AddressSpace *as)
2378 {
2379     AddressSpaceDispatch *d = as->dispatch;
2380
2381     atomic_rcu_set(&as->dispatch, NULL);
2382     if (d) {
2383         call_rcu(d, address_space_dispatch_free, rcu);
2384     }
2385 }
2386
2387 static void memory_map_init(void)
2388 {
2389     system_memory = g_malloc(sizeof(*system_memory));
2390
2391     memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2392     address_space_init(&address_space_memory, system_memory, "memory");
2393
2394     system_io = g_malloc(sizeof(*system_io));
2395     memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2396                           65536);
2397     address_space_init(&address_space_io, system_io, "I/O");
2398 }
2399
2400 MemoryRegion *get_system_memory(void)
2401 {
2402     return system_memory;
2403 }
2404
2405 MemoryRegion *get_system_io(void)
2406 {
2407     return system_io;
2408 }
2409
2410 #endif /* !defined(CONFIG_USER_ONLY) */
2411
2412 /* physical memory access (slow version, mainly for debug) */
2413 #if defined(CONFIG_USER_ONLY)
2414 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2415                         uint8_t *buf, int len, int is_write)
2416 {
2417     int l, flags;
2418     target_ulong page;
2419     void * p;
2420
2421     while (len > 0) {
2422         page = addr & TARGET_PAGE_MASK;
2423         l = (page + TARGET_PAGE_SIZE) - addr;
2424         if (l > len)
2425             l = len;
2426         flags = page_get_flags(page);
2427         if (!(flags & PAGE_VALID))
2428             return -1;
2429         if (is_write) {
2430             if (!(flags & PAGE_WRITE))
2431                 return -1;
2432             /* XXX: this code should not depend on lock_user */
2433             if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2434                 return -1;
2435             memcpy(p, buf, l);
2436             unlock_user(p, addr, l);
2437         } else {
2438             if (!(flags & PAGE_READ))
2439                 return -1;
2440             /* XXX: this code should not depend on lock_user */
2441             if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2442                 return -1;
2443             memcpy(buf, p, l);
2444             unlock_user(p, addr, 0);
2445         }
2446         len -= l;
2447         buf += l;
2448         addr += l;
2449     }
2450     return 0;
2451 }
2452
2453 #else
2454
2455 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2456                                      hwaddr length)
2457 {
2458     uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2459     /* No early return if dirty_log_mask is or becomes 0, because
2460      * cpu_physical_memory_set_dirty_range will still call
2461      * xen_modified_memory.
2462      */
2463     if (dirty_log_mask) {
2464         dirty_log_mask =
2465             cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2466     }
2467     if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2468         tb_invalidate_phys_range(addr, addr + length);
2469         dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2470     }
2471     cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2472 }
2473
2474 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2475 {
2476     unsigned access_size_max = mr->ops->valid.max_access_size;
2477
2478     /* Regions are assumed to support 1-4 byte accesses unless
2479        otherwise specified.  */
2480     if (access_size_max == 0) {
2481         access_size_max = 4;
2482     }
2483
2484     /* Bound the maximum access by the alignment of the address.  */
2485     if (!mr->ops->impl.unaligned) {
2486         unsigned align_size_max = addr & -addr;
2487         if (align_size_max != 0 && align_size_max < access_size_max) {
2488             access_size_max = align_size_max;
2489         }
2490     }
2491
2492     /* Don't attempt accesses larger than the maximum.  */
2493     if (l > access_size_max) {
2494         l = access_size_max;
2495     }
2496     l = pow2floor(l);
2497
2498     return l;
2499 }
2500
2501 static bool prepare_mmio_access(MemoryRegion *mr)
2502 {
2503     bool unlocked = !qemu_mutex_iothread_locked();
2504     bool release_lock = false;
2505
2506     if (unlocked && mr->global_locking) {
2507         qemu_mutex_lock_iothread();
2508         unlocked = false;
2509         release_lock = true;
2510     }
2511     if (mr->flush_coalesced_mmio) {
2512         if (unlocked) {
2513             qemu_mutex_lock_iothread();
2514         }
2515         qemu_flush_coalesced_mmio_buffer();
2516         if (unlocked) {
2517             qemu_mutex_unlock_iothread();
2518         }
2519     }
2520
2521     return release_lock;
2522 }
2523
2524 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2525                              uint8_t *buf, int len, bool is_write)
2526 {
2527     hwaddr l;
2528     uint8_t *ptr;
2529     uint64_t val;
2530     hwaddr addr1;
2531     MemoryRegion *mr;
2532     MemTxResult result = MEMTX_OK;
2533     bool release_lock = false;
2534
2535     rcu_read_lock();
2536     while (len > 0) {
2537         l = len;
2538         mr = address_space_translate(as, addr, &addr1, &l, is_write);
2539
2540         if (is_write) {
2541             if (!memory_access_is_direct(mr, is_write)) {
2542                 release_lock |= prepare_mmio_access(mr);
2543                 l = memory_access_size(mr, l, addr1);
2544                 /* XXX: could force current_cpu to NULL to avoid
2545                    potential bugs */
2546                 switch (l) {
2547                 case 8:
2548                     /* 64 bit write access */
2549                     val = ldq_p(buf);
2550                     result |= memory_region_dispatch_write(mr, addr1, val, 8,
2551                                                            attrs);
2552                     break;
2553                 case 4:
2554                     /* 32 bit write access */
2555                     val = ldl_p(buf);
2556                     result |= memory_region_dispatch_write(mr, addr1, val, 4,
2557                                                            attrs);
2558                     break;
2559                 case 2:
2560                     /* 16 bit write access */
2561                     val = lduw_p(buf);
2562                     result |= memory_region_dispatch_write(mr, addr1, val, 2,
2563                                                            attrs);
2564                     break;
2565                 case 1:
2566                     /* 8 bit write access */
2567                     val = ldub_p(buf);
2568                     result |= memory_region_dispatch_write(mr, addr1, val, 1,
2569                                                            attrs);
2570                     break;
2571                 default:
2572                     abort();
2573                 }
2574             } else {
2575                 addr1 += memory_region_get_ram_addr(mr);
2576                 /* RAM case */
2577                 ptr = qemu_get_ram_ptr(addr1);
2578                 memcpy(ptr, buf, l);
2579                 invalidate_and_set_dirty(mr, addr1, l);
2580             }
2581         } else {
2582             if (!memory_access_is_direct(mr, is_write)) {
2583                 /* I/O case */
2584                 release_lock |= prepare_mmio_access(mr);
2585                 l = memory_access_size(mr, l, addr1);
2586                 switch (l) {
2587                 case 8:
2588                     /* 64 bit read access */
2589                     result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2590                                                           attrs);
2591                     stq_p(buf, val);
2592                     break;
2593                 case 4:
2594                     /* 32 bit read access */
2595                     result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2596                                                           attrs);
2597                     stl_p(buf, val);
2598                     break;
2599                 case 2:
2600                     /* 16 bit read access */
2601                     result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2602                                                           attrs);
2603                     stw_p(buf, val);
2604                     break;
2605                 case 1:
2606                     /* 8 bit read access */
2607                     result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2608                                                           attrs);
2609                     stb_p(buf, val);
2610                     break;
2611                 default:
2612                     abort();
2613                 }
2614             } else {
2615                 /* RAM case */
2616                 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2617                 memcpy(buf, ptr, l);
2618             }
2619         }
2620
2621         if (release_lock) {
2622             qemu_mutex_unlock_iothread();
2623             release_lock = false;
2624         }
2625
2626         len -= l;
2627         buf += l;
2628         addr += l;
2629     }
2630     rcu_read_unlock();
2631
2632     return result;
2633 }
2634
2635 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2636                                 const uint8_t *buf, int len)
2637 {
2638     return address_space_rw(as, addr, attrs, (uint8_t *)buf, len, true);
2639 }
2640
2641 MemTxResult address_space_read(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2642                                uint8_t *buf, int len)
2643 {
2644     return address_space_rw(as, addr, attrs, buf, len, false);
2645 }
2646
2647
2648 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2649                             int len, int is_write)
2650 {
2651     address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2652                      buf, len, is_write);
2653 }
2654
2655 enum write_rom_type {
2656     WRITE_DATA,
2657     FLUSH_CACHE,
2658 };
2659
2660 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2661     hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2662 {
2663     hwaddr l;
2664     uint8_t *ptr;
2665     hwaddr addr1;
2666     MemoryRegion *mr;
2667
2668     rcu_read_lock();
2669     while (len > 0) {
2670         l = len;
2671         mr = address_space_translate(as, addr, &addr1, &l, true);
2672
2673         if (!(memory_region_is_ram(mr) ||
2674               memory_region_is_romd(mr))) {
2675             l = memory_access_size(mr, l, addr1);
2676         } else {
2677             addr1 += memory_region_get_ram_addr(mr);
2678             /* ROM/RAM case */
2679             ptr = qemu_get_ram_ptr(addr1);
2680             switch (type) {
2681             case WRITE_DATA:
2682                 memcpy(ptr, buf, l);
2683                 invalidate_and_set_dirty(mr, addr1, l);
2684                 break;
2685             case FLUSH_CACHE:
2686                 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2687                 break;
2688             }
2689         }
2690         len -= l;
2691         buf += l;
2692         addr += l;
2693     }
2694     rcu_read_unlock();
2695 }
2696
2697 /* used for ROM loading : can write in RAM and ROM */
2698 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2699                                    const uint8_t *buf, int len)
2700 {
2701     cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2702 }
2703
2704 void cpu_flush_icache_range(hwaddr start, int len)
2705 {
2706     /*
2707      * This function should do the same thing as an icache flush that was
2708      * triggered from within the guest. For TCG we are always cache coherent,
2709      * so there is no need to flush anything. For KVM / Xen we need to flush
2710      * the host's instruction cache at least.
2711      */
2712     if (tcg_enabled()) {
2713         return;
2714     }
2715
2716     cpu_physical_memory_write_rom_internal(&address_space_memory,
2717                                            start, NULL, len, FLUSH_CACHE);
2718 }
2719
2720 typedef struct {
2721     MemoryRegion *mr;
2722     void *buffer;
2723     hwaddr addr;
2724     hwaddr len;
2725     bool in_use;
2726 } BounceBuffer;
2727
2728 static BounceBuffer bounce;
2729
2730 typedef struct MapClient {
2731     QEMUBH *bh;
2732     QLIST_ENTRY(MapClient) link;
2733 } MapClient;
2734
2735 QemuMutex map_client_list_lock;
2736 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2737     = QLIST_HEAD_INITIALIZER(map_client_list);
2738
2739 static void cpu_unregister_map_client_do(MapClient *client)
2740 {
2741     QLIST_REMOVE(client, link);
2742     g_free(client);
2743 }
2744
2745 static void cpu_notify_map_clients_locked(void)
2746 {
2747     MapClient *client;
2748
2749     while (!QLIST_EMPTY(&map_client_list)) {
2750         client = QLIST_FIRST(&map_client_list);
2751         qemu_bh_schedule(client->bh);
2752         cpu_unregister_map_client_do(client);
2753     }
2754 }
2755
2756 void cpu_register_map_client(QEMUBH *bh)
2757 {
2758     MapClient *client = g_malloc(sizeof(*client));
2759
2760     qemu_mutex_lock(&map_client_list_lock);
2761     client->bh = bh;
2762     QLIST_INSERT_HEAD(&map_client_list, client, link);
2763     if (!atomic_read(&bounce.in_use)) {
2764         cpu_notify_map_clients_locked();
2765     }
2766     qemu_mutex_unlock(&map_client_list_lock);
2767 }
2768
2769 void cpu_exec_init_all(void)
2770 {
2771     qemu_mutex_init(&ram_list.mutex);
2772     io_mem_init();
2773     memory_map_init();
2774     qemu_mutex_init(&map_client_list_lock);
2775 }
2776
2777 void cpu_unregister_map_client(QEMUBH *bh)
2778 {
2779     MapClient *client;
2780
2781     qemu_mutex_lock(&map_client_list_lock);
2782     QLIST_FOREACH(client, &map_client_list, link) {
2783         if (client->bh == bh) {
2784             cpu_unregister_map_client_do(client);
2785             break;
2786         }
2787     }
2788     qemu_mutex_unlock(&map_client_list_lock);
2789 }
2790
2791 static void cpu_notify_map_clients(void)
2792 {
2793     qemu_mutex_lock(&map_client_list_lock);
2794     cpu_notify_map_clients_locked();
2795     qemu_mutex_unlock(&map_client_list_lock);
2796 }
2797
2798 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2799 {
2800     MemoryRegion *mr;
2801     hwaddr l, xlat;
2802
2803     rcu_read_lock();
2804     while (len > 0) {
2805         l = len;
2806         mr = address_space_translate(as, addr, &xlat, &l, is_write);
2807         if (!memory_access_is_direct(mr, is_write)) {
2808             l = memory_access_size(mr, l, addr);
2809             if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2810                 return false;
2811             }
2812         }
2813
2814         len -= l;
2815         addr += l;
2816     }
2817     rcu_read_unlock();
2818     return true;
2819 }
2820
2821 /* Map a physical memory region into a host virtual address.
2822  * May map a subset of the requested range, given by and returned in *plen.
2823  * May return NULL if resources needed to perform the mapping are exhausted.
2824  * Use only for reads OR writes - not for read-modify-write operations.
2825  * Use cpu_register_map_client() to know when retrying the map operation is
2826  * likely to succeed.
2827  */
2828 void *address_space_map(AddressSpace *as,
2829                         hwaddr addr,
2830                         hwaddr *plen,
2831                         bool is_write)
2832 {
2833     hwaddr len = *plen;
2834     hwaddr done = 0;
2835     hwaddr l, xlat, base;
2836     MemoryRegion *mr, *this_mr;
2837     ram_addr_t raddr;
2838
2839     if (len == 0) {
2840         return NULL;
2841     }
2842
2843     l = len;
2844     rcu_read_lock();
2845     mr = address_space_translate(as, addr, &xlat, &l, is_write);
2846
2847     if (!memory_access_is_direct(mr, is_write)) {
2848         if (atomic_xchg(&bounce.in_use, true)) {
2849             rcu_read_unlock();
2850             return NULL;
2851         }
2852         /* Avoid unbounded allocations */
2853         l = MIN(l, TARGET_PAGE_SIZE);
2854         bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2855         bounce.addr = addr;
2856         bounce.len = l;
2857
2858         memory_region_ref(mr);
2859         bounce.mr = mr;
2860         if (!is_write) {
2861             address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2862                                bounce.buffer, l);
2863         }
2864
2865         rcu_read_unlock();
2866         *plen = l;
2867         return bounce.buffer;
2868     }
2869
2870     base = xlat;
2871     raddr = memory_region_get_ram_addr(mr);
2872
2873     for (;;) {
2874         len -= l;
2875         addr += l;
2876         done += l;
2877         if (len == 0) {
2878             break;
2879         }
2880
2881         l = len;
2882         this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2883         if (this_mr != mr || xlat != base + done) {
2884             break;
2885         }
2886     }
2887
2888     memory_region_ref(mr);
2889     rcu_read_unlock();
2890     *plen = done;
2891     return qemu_ram_ptr_length(raddr + base, plen);
2892 }
2893
2894 /* Unmaps a memory region previously mapped by address_space_map().
2895  * Will also mark the memory as dirty if is_write == 1.  access_len gives
2896  * the amount of memory that was actually read or written by the caller.
2897  */
2898 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2899                          int is_write, hwaddr access_len)
2900 {
2901     if (buffer != bounce.buffer) {
2902         MemoryRegion *mr;
2903         ram_addr_t addr1;
2904
2905         mr = qemu_ram_addr_from_host(buffer, &addr1);
2906         assert(mr != NULL);
2907         if (is_write) {
2908             invalidate_and_set_dirty(mr, addr1, access_len);
2909         }
2910         if (xen_enabled()) {
2911             xen_invalidate_map_cache_entry(buffer);
2912         }
2913         memory_region_unref(mr);
2914         return;
2915     }
2916     if (is_write) {
2917         address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
2918                             bounce.buffer, access_len);
2919     }
2920     qemu_vfree(bounce.buffer);
2921     bounce.buffer = NULL;
2922     memory_region_unref(bounce.mr);
2923     atomic_mb_set(&bounce.in_use, false);
2924     cpu_notify_map_clients();
2925 }
2926
2927 void *cpu_physical_memory_map(hwaddr addr,
2928                               hwaddr *plen,
2929                               int is_write)
2930 {
2931     return address_space_map(&address_space_memory, addr, plen, is_write);
2932 }
2933
2934 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2935                                int is_write, hwaddr access_len)
2936 {
2937     return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2938 }
2939
2940 /* warning: addr must be aligned */
2941 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
2942                                                   MemTxAttrs attrs,
2943                                                   MemTxResult *result,
2944                                                   enum device_endian endian)
2945 {
2946     uint8_t *ptr;
2947     uint64_t val;
2948     MemoryRegion *mr;
2949     hwaddr l = 4;
2950     hwaddr addr1;
2951     MemTxResult r;
2952     bool release_lock = false;
2953
2954     rcu_read_lock();
2955     mr = address_space_translate(as, addr, &addr1, &l, false);
2956     if (l < 4 || !memory_access_is_direct(mr, false)) {
2957         release_lock |= prepare_mmio_access(mr);
2958
2959         /* I/O case */
2960         r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
2961 #if defined(TARGET_WORDS_BIGENDIAN)
2962         if (endian == DEVICE_LITTLE_ENDIAN) {
2963             val = bswap32(val);
2964         }
2965 #else
2966         if (endian == DEVICE_BIG_ENDIAN) {
2967             val = bswap32(val);
2968         }
2969 #endif
2970     } else {
2971         /* RAM case */
2972         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2973                                 & TARGET_PAGE_MASK)
2974                                + addr1);
2975         switch (endian) {
2976         case DEVICE_LITTLE_ENDIAN:
2977             val = ldl_le_p(ptr);
2978             break;
2979         case DEVICE_BIG_ENDIAN:
2980             val = ldl_be_p(ptr);
2981             break;
2982         default:
2983             val = ldl_p(ptr);
2984             break;
2985         }
2986         r = MEMTX_OK;
2987     }
2988     if (result) {
2989         *result = r;
2990     }
2991     if (release_lock) {
2992         qemu_mutex_unlock_iothread();
2993     }
2994     rcu_read_unlock();
2995     return val;
2996 }
2997
2998 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
2999                            MemTxAttrs attrs, MemTxResult *result)
3000 {
3001     return address_space_ldl_internal(as, addr, attrs, result,
3002                                       DEVICE_NATIVE_ENDIAN);
3003 }
3004
3005 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3006                               MemTxAttrs attrs, MemTxResult *result)
3007 {
3008     return address_space_ldl_internal(as, addr, attrs, result,
3009                                       DEVICE_LITTLE_ENDIAN);
3010 }
3011
3012 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3013                               MemTxAttrs attrs, MemTxResult *result)
3014 {
3015     return address_space_ldl_internal(as, addr, attrs, result,
3016                                       DEVICE_BIG_ENDIAN);
3017 }
3018
3019 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3020 {
3021     return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3022 }
3023
3024 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3025 {
3026     return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3027 }
3028
3029 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3030 {
3031     return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3032 }
3033
3034 /* warning: addr must be aligned */
3035 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3036                                                   MemTxAttrs attrs,
3037                                                   MemTxResult *result,
3038                                                   enum device_endian endian)
3039 {
3040     uint8_t *ptr;
3041     uint64_t val;
3042     MemoryRegion *mr;
3043     hwaddr l = 8;
3044     hwaddr addr1;
3045     MemTxResult r;
3046     bool release_lock = false;
3047
3048     rcu_read_lock();
3049     mr = address_space_translate(as, addr, &addr1, &l,
3050                                  false);
3051     if (l < 8 || !memory_access_is_direct(mr, false)) {
3052         release_lock |= prepare_mmio_access(mr);
3053
3054         /* I/O case */
3055         r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3056 #if defined(TARGET_WORDS_BIGENDIAN)
3057         if (endian == DEVICE_LITTLE_ENDIAN) {
3058             val = bswap64(val);
3059         }
3060 #else
3061         if (endian == DEVICE_BIG_ENDIAN) {
3062             val = bswap64(val);
3063         }
3064 #endif
3065     } else {
3066         /* RAM case */
3067         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3068                                 & TARGET_PAGE_MASK)
3069                                + addr1);
3070         switch (endian) {
3071         case DEVICE_LITTLE_ENDIAN:
3072             val = ldq_le_p(ptr);
3073             break;
3074         case DEVICE_BIG_ENDIAN:
3075             val = ldq_be_p(ptr);
3076             break;
3077         default:
3078             val = ldq_p(ptr);
3079             break;
3080         }
3081         r = MEMTX_OK;
3082     }
3083     if (result) {
3084         *result = r;
3085     }
3086     if (release_lock) {
3087         qemu_mutex_unlock_iothread();
3088     }
3089     rcu_read_unlock();
3090     return val;
3091 }
3092
3093 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3094                            MemTxAttrs attrs, MemTxResult *result)
3095 {
3096     return address_space_ldq_internal(as, addr, attrs, result,
3097                                       DEVICE_NATIVE_ENDIAN);
3098 }
3099
3100 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3101                            MemTxAttrs attrs, MemTxResult *result)
3102 {
3103     return address_space_ldq_internal(as, addr, attrs, result,
3104                                       DEVICE_LITTLE_ENDIAN);
3105 }
3106
3107 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3108                            MemTxAttrs attrs, MemTxResult *result)
3109 {
3110     return address_space_ldq_internal(as, addr, attrs, result,
3111                                       DEVICE_BIG_ENDIAN);
3112 }
3113
3114 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3115 {
3116     return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3117 }
3118
3119 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3120 {
3121     return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3122 }
3123
3124 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3125 {
3126     return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3127 }
3128
3129 /* XXX: optimize */
3130 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3131                             MemTxAttrs attrs, MemTxResult *result)
3132 {
3133     uint8_t val;
3134     MemTxResult r;
3135
3136     r = address_space_rw(as, addr, attrs, &val, 1, 0);
3137     if (result) {
3138         *result = r;
3139     }
3140     return val;
3141 }
3142
3143 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3144 {
3145     return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3146 }
3147
3148 /* warning: addr must be aligned */
3149 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3150                                                    hwaddr addr,
3151                                                    MemTxAttrs attrs,
3152                                                    MemTxResult *result,
3153                                                    enum device_endian endian)
3154 {
3155     uint8_t *ptr;
3156     uint64_t val;
3157     MemoryRegion *mr;
3158     hwaddr l = 2;
3159     hwaddr addr1;
3160     MemTxResult r;
3161     bool release_lock = false;
3162
3163     rcu_read_lock();
3164     mr = address_space_translate(as, addr, &addr1, &l,
3165                                  false);
3166     if (l < 2 || !memory_access_is_direct(mr, false)) {
3167         release_lock |= prepare_mmio_access(mr);
3168
3169         /* I/O case */
3170         r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3171 #if defined(TARGET_WORDS_BIGENDIAN)
3172         if (endian == DEVICE_LITTLE_ENDIAN) {
3173             val = bswap16(val);
3174         }
3175 #else
3176         if (endian == DEVICE_BIG_ENDIAN) {
3177             val = bswap16(val);
3178         }
3179 #endif
3180     } else {
3181         /* RAM case */
3182         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3183                                 & TARGET_PAGE_MASK)
3184                                + addr1);
3185         switch (endian) {
3186         case DEVICE_LITTLE_ENDIAN:
3187             val = lduw_le_p(ptr);
3188             break;
3189         case DEVICE_BIG_ENDIAN:
3190             val = lduw_be_p(ptr);
3191             break;
3192         default:
3193             val = lduw_p(ptr);
3194             break;
3195         }
3196         r = MEMTX_OK;
3197     }
3198     if (result) {
3199         *result = r;
3200     }
3201     if (release_lock) {
3202         qemu_mutex_unlock_iothread();
3203     }
3204     rcu_read_unlock();
3205     return val;
3206 }
3207
3208 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3209                            MemTxAttrs attrs, MemTxResult *result)
3210 {
3211     return address_space_lduw_internal(as, addr, attrs, result,
3212                                        DEVICE_NATIVE_ENDIAN);
3213 }
3214
3215 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3216                            MemTxAttrs attrs, MemTxResult *result)
3217 {
3218     return address_space_lduw_internal(as, addr, attrs, result,
3219                                        DEVICE_LITTLE_ENDIAN);
3220 }
3221
3222 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3223                            MemTxAttrs attrs, MemTxResult *result)
3224 {
3225     return address_space_lduw_internal(as, addr, attrs, result,
3226                                        DEVICE_BIG_ENDIAN);
3227 }
3228
3229 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3230 {
3231     return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3232 }
3233
3234 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3235 {
3236     return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3237 }
3238
3239 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3240 {
3241     return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3242 }
3243
3244 /* warning: addr must be aligned. The ram page is not masked as dirty
3245    and the code inside is not invalidated. It is useful if the dirty
3246    bits are used to track modified PTEs */
3247 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3248                                 MemTxAttrs attrs, MemTxResult *result)
3249 {
3250     uint8_t *ptr;
3251     MemoryRegion *mr;
3252     hwaddr l = 4;
3253     hwaddr addr1;
3254     MemTxResult r;
3255     uint8_t dirty_log_mask;
3256     bool release_lock = false;
3257
3258     rcu_read_lock();
3259     mr = address_space_translate(as, addr, &addr1, &l,
3260                                  true);
3261     if (l < 4 || !memory_access_is_direct(mr, true)) {
3262         release_lock |= prepare_mmio_access(mr);
3263
3264         r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3265     } else {
3266         addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3267         ptr = qemu_get_ram_ptr(addr1);
3268         stl_p(ptr, val);
3269
3270         dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3271         dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3272         cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3273         r = MEMTX_OK;
3274     }
3275     if (result) {
3276         *result = r;
3277     }
3278     if (release_lock) {
3279         qemu_mutex_unlock_iothread();
3280     }
3281     rcu_read_unlock();
3282 }
3283
3284 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3285 {
3286     address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3287 }
3288
3289 /* warning: addr must be aligned */
3290 static inline void address_space_stl_internal(AddressSpace *as,
3291                                               hwaddr addr, uint32_t val,
3292                                               MemTxAttrs attrs,
3293                                               MemTxResult *result,
3294                                               enum device_endian endian)
3295 {
3296     uint8_t *ptr;
3297     MemoryRegion *mr;
3298     hwaddr l = 4;
3299     hwaddr addr1;
3300     MemTxResult r;
3301     bool release_lock = false;
3302
3303     rcu_read_lock();
3304     mr = address_space_translate(as, addr, &addr1, &l,
3305                                  true);
3306     if (l < 4 || !memory_access_is_direct(mr, true)) {
3307         release_lock |= prepare_mmio_access(mr);
3308
3309 #if defined(TARGET_WORDS_BIGENDIAN)
3310         if (endian == DEVICE_LITTLE_ENDIAN) {
3311             val = bswap32(val);
3312         }
3313 #else
3314         if (endian == DEVICE_BIG_ENDIAN) {
3315             val = bswap32(val);
3316         }
3317 #endif
3318         r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3319     } else {
3320         /* RAM case */
3321         addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3322         ptr = qemu_get_ram_ptr(addr1);
3323         switch (endian) {
3324         case DEVICE_LITTLE_ENDIAN:
3325             stl_le_p(ptr, val);
3326             break;
3327         case DEVICE_BIG_ENDIAN:
3328             stl_be_p(ptr, val);
3329             break;
3330         default:
3331             stl_p(ptr, val);
3332             break;
3333         }
3334         invalidate_and_set_dirty(mr, addr1, 4);
3335         r = MEMTX_OK;
3336     }
3337     if (result) {
3338         *result = r;
3339     }
3340     if (release_lock) {
3341         qemu_mutex_unlock_iothread();
3342     }
3343     rcu_read_unlock();
3344 }
3345
3346 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3347                        MemTxAttrs attrs, MemTxResult *result)
3348 {
3349     address_space_stl_internal(as, addr, val, attrs, result,
3350                                DEVICE_NATIVE_ENDIAN);
3351 }
3352
3353 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3354                        MemTxAttrs attrs, MemTxResult *result)
3355 {
3356     address_space_stl_internal(as, addr, val, attrs, result,
3357                                DEVICE_LITTLE_ENDIAN);
3358 }
3359
3360 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3361                        MemTxAttrs attrs, MemTxResult *result)
3362 {
3363     address_space_stl_internal(as, addr, val, attrs, result,
3364                                DEVICE_BIG_ENDIAN);
3365 }
3366
3367 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3368 {
3369     address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3370 }
3371
3372 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3373 {
3374     address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3375 }
3376
3377 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3378 {
3379     address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3380 }
3381
3382 /* XXX: optimize */
3383 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3384                        MemTxAttrs attrs, MemTxResult *result)
3385 {
3386     uint8_t v = val;
3387     MemTxResult r;
3388
3389     r = address_space_rw(as, addr, attrs, &v, 1, 1);
3390     if (result) {
3391         *result = r;
3392     }
3393 }
3394
3395 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3396 {
3397     address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3398 }
3399
3400 /* warning: addr must be aligned */
3401 static inline void address_space_stw_internal(AddressSpace *as,
3402                                               hwaddr addr, uint32_t val,
3403                                               MemTxAttrs attrs,
3404                                               MemTxResult *result,
3405                                               enum device_endian endian)
3406 {
3407     uint8_t *ptr;
3408     MemoryRegion *mr;
3409     hwaddr l = 2;
3410     hwaddr addr1;
3411     MemTxResult r;
3412     bool release_lock = false;
3413
3414     rcu_read_lock();
3415     mr = address_space_translate(as, addr, &addr1, &l, true);
3416     if (l < 2 || !memory_access_is_direct(mr, true)) {
3417         release_lock |= prepare_mmio_access(mr);
3418
3419 #if defined(TARGET_WORDS_BIGENDIAN)
3420         if (endian == DEVICE_LITTLE_ENDIAN) {
3421             val = bswap16(val);
3422         }
3423 #else
3424         if (endian == DEVICE_BIG_ENDIAN) {
3425             val = bswap16(val);
3426         }
3427 #endif
3428         r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3429     } else {
3430         /* RAM case */
3431         addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3432         ptr = qemu_get_ram_ptr(addr1);
3433         switch (endian) {
3434         case DEVICE_LITTLE_ENDIAN:
3435             stw_le_p(ptr, val);
3436             break;
3437         case DEVICE_BIG_ENDIAN:
3438             stw_be_p(ptr, val);
3439             break;
3440         default:
3441             stw_p(ptr, val);
3442             break;
3443         }
3444         invalidate_and_set_dirty(mr, addr1, 2);
3445         r = MEMTX_OK;
3446     }
3447     if (result) {
3448         *result = r;
3449     }
3450     if (release_lock) {
3451         qemu_mutex_unlock_iothread();
3452     }
3453     rcu_read_unlock();
3454 }
3455
3456 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3457                        MemTxAttrs attrs, MemTxResult *result)
3458 {
3459     address_space_stw_internal(as, addr, val, attrs, result,
3460                                DEVICE_NATIVE_ENDIAN);
3461 }
3462
3463 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3464                        MemTxAttrs attrs, MemTxResult *result)
3465 {
3466     address_space_stw_internal(as, addr, val, attrs, result,
3467                                DEVICE_LITTLE_ENDIAN);
3468 }
3469
3470 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3471                        MemTxAttrs attrs, MemTxResult *result)
3472 {
3473     address_space_stw_internal(as, addr, val, attrs, result,
3474                                DEVICE_BIG_ENDIAN);
3475 }
3476
3477 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3478 {
3479     address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3480 }
3481
3482 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3483 {
3484     address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3485 }
3486
3487 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3488 {
3489     address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3490 }
3491
3492 /* XXX: optimize */
3493 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3494                        MemTxAttrs attrs, MemTxResult *result)
3495 {
3496     MemTxResult r;
3497     val = tswap64(val);
3498     r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3499     if (result) {
3500         *result = r;
3501     }
3502 }
3503
3504 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3505                        MemTxAttrs attrs, MemTxResult *result)
3506 {
3507     MemTxResult r;
3508     val = cpu_to_le64(val);
3509     r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3510     if (result) {
3511         *result = r;
3512     }
3513 }
3514 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3515                        MemTxAttrs attrs, MemTxResult *result)
3516 {
3517     MemTxResult r;
3518     val = cpu_to_be64(val);
3519     r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3520     if (result) {
3521         *result = r;
3522     }
3523 }
3524
3525 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3526 {
3527     address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3528 }
3529
3530 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3531 {
3532     address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3533 }
3534
3535 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3536 {
3537     address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3538 }
3539
3540 /* virtual memory access for debug (includes writing to ROM) */
3541 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3542                         uint8_t *buf, int len, int is_write)
3543 {
3544     int l;
3545     hwaddr phys_addr;
3546     target_ulong page;
3547
3548     while (len > 0) {
3549         page = addr & TARGET_PAGE_MASK;
3550         phys_addr = cpu_get_phys_page_debug(cpu, page);
3551         /* if no physical page mapped, return an error */
3552         if (phys_addr == -1)
3553             return -1;
3554         l = (page + TARGET_PAGE_SIZE) - addr;
3555         if (l > len)
3556             l = len;
3557         phys_addr += (addr & ~TARGET_PAGE_MASK);
3558         if (is_write) {
3559             cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
3560         } else {
3561             address_space_rw(cpu->as, phys_addr, MEMTXATTRS_UNSPECIFIED,
3562                              buf, l, 0);
3563         }
3564         len -= l;
3565         buf += l;
3566         addr += l;
3567     }
3568     return 0;
3569 }
3570
3571 /*
3572  * Allows code that needs to deal with migration bitmaps etc to still be built
3573  * target independent.
3574  */
3575 size_t qemu_target_page_bits(void)
3576 {
3577     return TARGET_PAGE_BITS;
3578 }
3579
3580 #endif
3581
3582 /*
3583  * A helper function for the _utterly broken_ virtio device model to find out if
3584  * it's running on a big endian machine. Don't do this at home kids!
3585  */
3586 bool target_words_bigendian(void);
3587 bool target_words_bigendian(void)
3588 {
3589 #if defined(TARGET_WORDS_BIGENDIAN)
3590     return true;
3591 #else
3592     return false;
3593 #endif
3594 }
3595
3596 #ifndef CONFIG_USER_ONLY
3597 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3598 {
3599     MemoryRegion*mr;
3600     hwaddr l = 1;
3601     bool res;
3602
3603     rcu_read_lock();
3604     mr = address_space_translate(&address_space_memory,
3605                                  phys_addr, &phys_addr, &l, false);
3606
3607     res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3608     rcu_read_unlock();
3609     return res;
3610 }
3611
3612 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3613 {
3614     RAMBlock *block;
3615     int ret = 0;
3616
3617     rcu_read_lock();
3618     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3619         ret = func(block->idstr, block->host, block->offset,
3620                    block->used_length, opaque);
3621         if (ret) {
3622             break;
3623         }
3624     }
3625     rcu_read_unlock();
3626     return ret;
3627 }
3628 #endif