exec.c

   1 /*
   2  *  Virtual page mapping
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include "config.h"
  20 #ifndef _WIN32
  21 #include <sys/types.h>
  22 #include <sys/mman.h>
  23 #endif
  24
  25 #include "qemu-common.h"
  26 #include "cpu.h"
  27 #include "tcg.h"
  28 #include "hw/hw.h"
  29 #include "hw/qdev.h"
  30 #include "qemu/osdep.h"
  31 #include "sysemu/kvm.h"
  32 #include "sysemu/sysemu.h"
  33 #include "hw/xen/xen.h"
  34 #include "qemu/timer.h"
  35 #include "qemu/config-file.h"
  36 #include "qemu/error-report.h"
  37 #include "exec/memory.h"
  38 #include "sysemu/dma.h"
  39 #include "exec/address-spaces.h"
  40 #if defined(CONFIG_USER_ONLY)
  41 #include <qemu.h>
  42 #else /* !CONFIG_USER_ONLY */
  43 #include "sysemu/xen-mapcache.h"
  44 #include "trace.h"
  45 #endif
  46 #include "exec/cpu-all.h"
  47 #include "qemu/rcu_queue.h"
  48 #include "exec/cputlb.h"
  49 #include "translate-all.h"
  50
  51 #include "exec/memory-internal.h"
  52 #include "exec/ram_addr.h"
  53
  54 #include "qemu/range.h"
  55
  56 //#define DEBUG_SUBPAGE
  57
  58 #if !defined(CONFIG_USER_ONLY)
  59 static bool in_migration;
  60
  61 /* ram_list is read under rcu_read_lock()/rcu_read_unlock().  Writes
  62  * are protected by the ramlist lock.
  63  */
  64 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
  65
  66 static MemoryRegion *system_memory;
  67 static MemoryRegion *system_io;
  68
  69 AddressSpace address_space_io;
  70 AddressSpace address_space_memory;
  71
  72 MemoryRegion io_mem_rom, io_mem_notdirty;
  73 static MemoryRegion io_mem_unassigned;
  74
  75 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
  76 #define RAM_PREALLOC   (1 << 0)
  77
  78 /* RAM is mmap-ed with MAP_SHARED */
  79 #define RAM_SHARED     (1 << 1)
  80
  81 /* Only a portion of RAM (used_length) is actually used, and migrated.
  82  * This used_length size can change across reboots.
  83  */
  84 #define RAM_RESIZEABLE (1 << 2)
  85
  86 #endif
  87
  88 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
  89 /* current CPU in the current thread. It is only valid inside
  90    cpu_exec() */
  91 DEFINE_TLS(CPUState *, current_cpu);
  92 /* 0 = Do not count executed instructions.
  93    1 = Precise instruction counting.
  94    2 = Adaptive rate instruction counting.  */
  95 int use_icount;
  96
  97 #if !defined(CONFIG_USER_ONLY)
  98
  99 typedef struct PhysPageEntry PhysPageEntry;
 100
 101 struct PhysPageEntry {
 102     /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
 103     uint32_t skip : 6;
 104      /* index into phys_sections (!skip) or phys_map_nodes (skip) */
 105     uint32_t ptr : 26;
 106 };
 107
 108 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
 109
 110 /* Size of the L2 (and L3, etc) page tables.  */
 111 #define ADDR_SPACE_BITS 64
 112
 113 #define P_L2_BITS 9
 114 #define P_L2_SIZE (1 << P_L2_BITS)
 115
 116 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
 117
 118 typedef PhysPageEntry Node[P_L2_SIZE];
 119
 120 typedef struct PhysPageMap {
 121     struct rcu_head rcu;
 122
 123     unsigned sections_nb;
 124     unsigned sections_nb_alloc;
 125     unsigned nodes_nb;
 126     unsigned nodes_nb_alloc;
 127     Node *nodes;
 128     MemoryRegionSection *sections;
 129 } PhysPageMap;
 130
 131 struct AddressSpaceDispatch {
 132     struct rcu_head rcu;
 133
 134     /* This is a multi-level map on the physical address space.
 135      * The bottom level has pointers to MemoryRegionSections.
 136      */
 137     PhysPageEntry phys_map;
 138     PhysPageMap map;
 139     AddressSpace *as;
 140 };
 141
 142 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
 143 typedef struct subpage_t {
 144     MemoryRegion iomem;
 145     AddressSpace *as;
 146     hwaddr base;
 147     uint16_t sub_section[TARGET_PAGE_SIZE];
 148 } subpage_t;
 149
 150 #define PHYS_SECTION_UNASSIGNED 0
 151 #define PHYS_SECTION_NOTDIRTY 1
 152 #define PHYS_SECTION_ROM 2
 153 #define PHYS_SECTION_WATCH 3
 154
 155 static void io_mem_init(void);
 156 static void memory_map_init(void);
 157 static void tcg_commit(MemoryListener *listener);
 158
 159 static MemoryRegion io_mem_watch;
 160 #endif
 161
 162 #if !defined(CONFIG_USER_ONLY)
 163
 164 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
 165 {
 166     if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
 167         map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
 168         map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
 169         map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
 170     }
 171 }
 172
 173 static uint32_t phys_map_node_alloc(PhysPageMap *map)
 174 {
 175     unsigned i;
 176     uint32_t ret;
 177
 178     ret = map->nodes_nb++;
 179     assert(ret != PHYS_MAP_NODE_NIL);
 180     assert(ret != map->nodes_nb_alloc);
 181     for (i = 0; i < P_L2_SIZE; ++i) {
 182         map->nodes[ret][i].skip = 1;
 183         map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
 184     }
 185     return ret;
 186 }
 187
 188 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
 189                                 hwaddr *index, hwaddr *nb, uint16_t leaf,
 190                                 int level)
 191 {
 192     PhysPageEntry *p;
 193     int i;
 194     hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
 195
 196     if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
 197         lp->ptr = phys_map_node_alloc(map);
 198         p = map->nodes[lp->ptr];
 199         if (level == 0) {
 200             for (i = 0; i < P_L2_SIZE; i++) {
 201                 p[i].skip = 0;
 202                 p[i].ptr = PHYS_SECTION_UNASSIGNED;
 203             }
 204         }
 205     } else {
 206         p = map->nodes[lp->ptr];
 207     }
 208     lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
 209
 210     while (*nb && lp < &p[P_L2_SIZE]) {
 211         if ((*index & (step - 1)) == 0 && *nb >= step) {
 212             lp->skip = 0;
 213             lp->ptr = leaf;
 214             *index += step;
 215             *nb -= step;
 216         } else {
 217             phys_page_set_level(map, lp, index, nb, leaf, level - 1);
 218         }
 219         ++lp;
 220     }
 221 }
 222
 223 static void phys_page_set(AddressSpaceDispatch *d,
 224                           hwaddr index, hwaddr nb,
 225                           uint16_t leaf)
 226 {
 227     /* Wildly overreserve - it doesn't matter much. */
 228     phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
 229
 230     phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
 231 }
 232
 233 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
 234  * and update our entry so we can skip it and go directly to the destination.
 235  */
 236 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
 237 {
 238     unsigned valid_ptr = P_L2_SIZE;
 239     int valid = 0;
 240     PhysPageEntry *p;
 241     int i;
 242
 243     if (lp->ptr == PHYS_MAP_NODE_NIL) {
 244         return;
 245     }
 246
 247     p = nodes[lp->ptr];
 248     for (i = 0; i < P_L2_SIZE; i++) {
 249         if (p[i].ptr == PHYS_MAP_NODE_NIL) {
 250             continue;
 251         }
 252
 253         valid_ptr = i;
 254         valid++;
 255         if (p[i].skip) {
 256             phys_page_compact(&p[i], nodes, compacted);
 257         }
 258     }
 259
 260     /* We can only compress if there's only one child. */
 261     if (valid != 1) {
 262         return;
 263     }
 264
 265     assert(valid_ptr < P_L2_SIZE);
 266
 267     /* Don't compress if it won't fit in the # of bits we have. */
 268     if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
 269         return;
 270     }
 271
 272     lp->ptr = p[valid_ptr].ptr;
 273     if (!p[valid_ptr].skip) {
 274         /* If our only child is a leaf, make this a leaf. */
 275         /* By design, we should have made this node a leaf to begin with so we
 276          * should never reach here.
 277          * But since it's so simple to handle this, let's do it just in case we
 278          * change this rule.
 279          */
 280         lp->skip = 0;
 281     } else {
 282         lp->skip += p[valid_ptr].skip;
 283     }
 284 }
 285
 286 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
 287 {
 288     DECLARE_BITMAP(compacted, nodes_nb);
 289
 290     if (d->phys_map.skip) {
 291         phys_page_compact(&d->phys_map, d->map.nodes, compacted);
 292     }
 293 }
 294
 295 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
 296                                            Node *nodes, MemoryRegionSection *sections)
 297 {
 298     PhysPageEntry *p;
 299     hwaddr index = addr >> TARGET_PAGE_BITS;
 300     int i;
 301
 302     for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
 303         if (lp.ptr == PHYS_MAP_NODE_NIL) {
 304             return &sections[PHYS_SECTION_UNASSIGNED];
 305         }
 306         p = nodes[lp.ptr];
 307         lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
 308     }
 309
 310     if (sections[lp.ptr].size.hi ||
 311         range_covers_byte(sections[lp.ptr].offset_within_address_space,
 312                           sections[lp.ptr].size.lo, addr)) {
 313         return &sections[lp.ptr];
 314     } else {
 315         return &sections[PHYS_SECTION_UNASSIGNED];
 316     }
 317 }
 318
 319 bool memory_region_is_unassigned(MemoryRegion *mr)
 320 {
 321     return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
 322         && mr != &io_mem_watch;
 323 }
 324
 325 /* Called from RCU critical section */
 326 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
 327                                                         hwaddr addr,
 328                                                         bool resolve_subpage)
 329 {
 330     MemoryRegionSection *section;
 331     subpage_t *subpage;
 332
 333     section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
 334     if (resolve_subpage && section->mr->subpage) {
 335         subpage = container_of(section->mr, subpage_t, iomem);
 336         section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
 337     }
 338     return section;
 339 }
 340
 341 /* Called from RCU critical section */
 342 static MemoryRegionSection *
 343 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
 344                                  hwaddr *plen, bool resolve_subpage)
 345 {
 346     MemoryRegionSection *section;
 347     Int128 diff;
 348
 349     section = address_space_lookup_region(d, addr, resolve_subpage);
 350     /* Compute offset within MemoryRegionSection */
 351     addr -= section->offset_within_address_space;
 352
 353     /* Compute offset within MemoryRegion */
 354     *xlat = addr + section->offset_within_region;
 355
 356     diff = int128_sub(section->mr->size, int128_make64(addr));
 357     *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
 358     return section;
 359 }
 360
 361 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
 362 {
 363     if (memory_region_is_ram(mr)) {
 364         return !(is_write && mr->readonly);
 365     }
 366     if (memory_region_is_romd(mr)) {
 367         return !is_write;
 368     }
 369
 370     return false;
 371 }
 372
 373 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
 374                                       hwaddr *xlat, hwaddr *plen,
 375                                       bool is_write)
 376 {
 377     IOMMUTLBEntry iotlb;
 378     MemoryRegionSection *section;
 379     MemoryRegion *mr;
 380     hwaddr len = *plen;
 381
 382     rcu_read_lock();
 383     for (;;) {
 384         AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
 385         section = address_space_translate_internal(d, addr, &addr, plen, true);
 386         mr = section->mr;
 387
 388         if (!mr->iommu_ops) {
 389             break;
 390         }
 391
 392         iotlb = mr->iommu_ops->translate(mr, addr, is_write);
 393         addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
 394                 | (addr & iotlb.addr_mask));
 395         len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
 396         if (!(iotlb.perm & (1 << is_write))) {
 397             mr = &io_mem_unassigned;
 398             break;
 399         }
 400
 401         as = iotlb.target_as;
 402     }
 403
 404     if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
 405         hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
 406         len = MIN(page, len);
 407     }
 408
 409     *plen = len;
 410     *xlat = addr;
 411     rcu_read_unlock();
 412     return mr;
 413 }
 414
 415 /* Called from RCU critical section */
 416 MemoryRegionSection *
 417 address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
 418                                   hwaddr *xlat, hwaddr *plen)
 419 {
 420     MemoryRegionSection *section;
 421     section = address_space_translate_internal(cpu->memory_dispatch,
 422                                                addr, xlat, plen, false);
 423
 424     assert(!section->mr->iommu_ops);
 425     return section;
 426 }
 427 #endif
 428
 429 void cpu_exec_init_all(void)
 430 {
 431 #if !defined(CONFIG_USER_ONLY)
 432     qemu_mutex_init(&ram_list.mutex);
 433     memory_map_init();
 434     io_mem_init();
 435 #endif
 436 }
 437
 438 #if !defined(CONFIG_USER_ONLY)
 439
 440 static int cpu_common_post_load(void *opaque, int version_id)
 441 {
 442     CPUState *cpu = opaque;
 443
 444     /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
 445        version_id is increased. */
 446     cpu->interrupt_request &= ~0x01;
 447     tlb_flush(cpu, 1);
 448
 449     return 0;
 450 }
 451
 452 static int cpu_common_pre_load(void *opaque)
 453 {
 454     CPUState *cpu = opaque;
 455
 456     cpu->exception_index = -1;
 457
 458     return 0;
 459 }
 460
 461 static bool cpu_common_exception_index_needed(void *opaque)
 462 {
 463     CPUState *cpu = opaque;
 464
 465     return tcg_enabled() && cpu->exception_index != -1;
 466 }
 467
 468 static const VMStateDescription vmstate_cpu_common_exception_index = {
 469     .name = "cpu_common/exception_index",
 470     .version_id = 1,
 471     .minimum_version_id = 1,
 472     .fields = (VMStateField[]) {
 473         VMSTATE_INT32(exception_index, CPUState),
 474         VMSTATE_END_OF_LIST()
 475     }
 476 };
 477
 478 const VMStateDescription vmstate_cpu_common = {
 479     .name = "cpu_common",
 480     .version_id = 1,
 481     .minimum_version_id = 1,
 482     .pre_load = cpu_common_pre_load,
 483     .post_load = cpu_common_post_load,
 484     .fields = (VMStateField[]) {
 485         VMSTATE_UINT32(halted, CPUState),
 486         VMSTATE_UINT32(interrupt_request, CPUState),
 487         VMSTATE_END_OF_LIST()
 488     },
 489     .subsections = (VMStateSubsection[]) {
 490         {
 491             .vmsd = &vmstate_cpu_common_exception_index,
 492             .needed = cpu_common_exception_index_needed,
 493         } , {
 494             /* empty */
 495         }
 496     }
 497 };
 498
 499 #endif
 500
 501 CPUState *qemu_get_cpu(int index)
 502 {
 503     CPUState *cpu;
 504
 505     CPU_FOREACH(cpu) {
 506         if (cpu->cpu_index == index) {
 507             return cpu;
 508         }
 509     }
 510
 511     return NULL;
 512 }
 513
 514 #if !defined(CONFIG_USER_ONLY)
 515 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
 516 {
 517     /* We only support one address space per cpu at the moment.  */
 518     assert(cpu->as == as);
 519
 520     if (cpu->tcg_as_listener) {
 521         memory_listener_unregister(cpu->tcg_as_listener);
 522     } else {
 523         cpu->tcg_as_listener = g_new0(MemoryListener, 1);
 524     }
 525     cpu->tcg_as_listener->commit = tcg_commit;
 526     memory_listener_register(cpu->tcg_as_listener, as);
 527 }
 528 #endif
 529
 530 void cpu_exec_init(CPUArchState *env)
 531 {
 532     CPUState *cpu = ENV_GET_CPU(env);
 533     CPUClass *cc = CPU_GET_CLASS(cpu);
 534     CPUState *some_cpu;
 535     int cpu_index;
 536
 537 #if defined(CONFIG_USER_ONLY)
 538     cpu_list_lock();
 539 #endif
 540     cpu_index = 0;
 541     CPU_FOREACH(some_cpu) {
 542         cpu_index++;
 543     }
 544     cpu->cpu_index = cpu_index;
 545     cpu->numa_node = 0;
 546     QTAILQ_INIT(&cpu->breakpoints);
 547     QTAILQ_INIT(&cpu->watchpoints);
 548 #ifndef CONFIG_USER_ONLY
 549     cpu->as = &address_space_memory;
 550     cpu->thread_id = qemu_get_thread_id();
 551 #endif
 552     QTAILQ_INSERT_TAIL(&cpus, cpu, node);
 553 #if defined(CONFIG_USER_ONLY)
 554     cpu_list_unlock();
 555 #endif
 556     if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
 557         vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
 558     }
 559 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
 560     register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
 561                     cpu_save, cpu_load, env);
 562     assert(cc->vmsd == NULL);
 563     assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
 564 #endif
 565     if (cc->vmsd != NULL) {
 566         vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
 567     }
 568 }
 569
 570 #if defined(CONFIG_USER_ONLY)
 571 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 572 {
 573     tb_invalidate_phys_page_range(pc, pc + 1, 0);
 574 }
 575 #else
 576 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 577 {
 578     hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
 579     if (phys != -1) {
 580         tb_invalidate_phys_addr(cpu->as,
 581                                 phys | (pc & ~TARGET_PAGE_MASK));
 582     }
 583 }
 584 #endif
 585
 586 #if defined(CONFIG_USER_ONLY)
 587 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 588
 589 {
 590 }
 591
 592 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 593                           int flags)
 594 {
 595     return -ENOSYS;
 596 }
 597
 598 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 599 {
 600 }
 601
 602 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 603                           int flags, CPUWatchpoint **watchpoint)
 604 {
 605     return -ENOSYS;
 606 }
 607 #else
 608 /* Add a watchpoint.  */
 609 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 610                           int flags, CPUWatchpoint **watchpoint)
 611 {
 612     CPUWatchpoint *wp;
 613
 614     /* forbid ranges which are empty or run off the end of the address space */
 615     if (len == 0 || (addr + len - 1) < addr) {
 616         error_report("tried to set invalid watchpoint at %"
 617                      VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
 618         return -EINVAL;
 619     }
 620     wp = g_malloc(sizeof(*wp));
 621
 622     wp->vaddr = addr;
 623     wp->len = len;
 624     wp->flags = flags;
 625
 626     /* keep all GDB-injected watchpoints in front */
 627     if (flags & BP_GDB) {
 628         QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
 629     } else {
 630         QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
 631     }
 632
 633     tlb_flush_page(cpu, addr);
 634
 635     if (watchpoint)
 636         *watchpoint = wp;
 637     return 0;
 638 }
 639
 640 /* Remove a specific watchpoint.  */
 641 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 642                           int flags)
 643 {
 644     CPUWatchpoint *wp;
 645
 646     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
 647         if (addr == wp->vaddr && len == wp->len
 648                 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
 649             cpu_watchpoint_remove_by_ref(cpu, wp);
 650             return 0;
 651         }
 652     }
 653     return -ENOENT;
 654 }
 655
 656 /* Remove a specific watchpoint by reference.  */
 657 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 658 {
 659     QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
 660
 661     tlb_flush_page(cpu, watchpoint->vaddr);
 662
 663     g_free(watchpoint);
 664 }
 665
 666 /* Remove all matching watchpoints.  */
 667 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 668 {
 669     CPUWatchpoint *wp, *next;
 670
 671     QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
 672         if (wp->flags & mask) {
 673             cpu_watchpoint_remove_by_ref(cpu, wp);
 674         }
 675     }
 676 }
 677
 678 /* Return true if this watchpoint address matches the specified
 679  * access (ie the address range covered by the watchpoint overlaps
 680  * partially or completely with the address range covered by the
 681  * access).
 682  */
 683 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
 684                                                   vaddr addr,
 685                                                   vaddr len)
 686 {
 687     /* We know the lengths are non-zero, but a little caution is
 688      * required to avoid errors in the case where the range ends
 689      * exactly at the top of the address space and so addr + len
 690      * wraps round to zero.
 691      */
 692     vaddr wpend = wp->vaddr + wp->len - 1;
 693     vaddr addrend = addr + len - 1;
 694
 695     return !(addr > wpend || wp->vaddr > addrend);
 696 }
 697
 698 #endif
 699
 700 /* Add a breakpoint.  */
 701 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
 702                           CPUBreakpoint **breakpoint)
 703 {
 704     CPUBreakpoint *bp;
 705
 706     bp = g_malloc(sizeof(*bp));
 707
 708     bp->pc = pc;
 709     bp->flags = flags;
 710
 711     /* keep all GDB-injected breakpoints in front */
 712     if (flags & BP_GDB) {
 713         QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
 714     } else {
 715         QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
 716     }
 717
 718     breakpoint_invalidate(cpu, pc);
 719
 720     if (breakpoint) {
 721         *breakpoint = bp;
 722     }
 723     return 0;
 724 }
 725
 726 /* Remove a specific breakpoint.  */
 727 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
 728 {
 729     CPUBreakpoint *bp;
 730
 731     QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
 732         if (bp->pc == pc && bp->flags == flags) {
 733             cpu_breakpoint_remove_by_ref(cpu, bp);
 734             return 0;
 735         }
 736     }
 737     return -ENOENT;
 738 }
 739
 740 /* Remove a specific breakpoint by reference.  */
 741 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
 742 {
 743     QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
 744
 745     breakpoint_invalidate(cpu, breakpoint->pc);
 746
 747     g_free(breakpoint);
 748 }
 749
 750 /* Remove all matching breakpoints. */
 751 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
 752 {
 753     CPUBreakpoint *bp, *next;
 754
 755     QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
 756         if (bp->flags & mask) {
 757             cpu_breakpoint_remove_by_ref(cpu, bp);
 758         }
 759     }
 760 }
 761
 762 /* enable or disable single step mode. EXCP_DEBUG is returned by the
 763    CPU loop after each instruction */
 764 void cpu_single_step(CPUState *cpu, int enabled)
 765 {
 766     if (cpu->singlestep_enabled != enabled) {
 767         cpu->singlestep_enabled = enabled;
 768         if (kvm_enabled()) {
 769             kvm_update_guest_debug(cpu, 0);
 770         } else {
 771             /* must flush all the translated code to avoid inconsistencies */
 772             /* XXX: only flush what is necessary */
 773             CPUArchState *env = cpu->env_ptr;
 774             tb_flush(env);
 775         }
 776     }
 777 }
 778
 779 void cpu_abort(CPUState *cpu, const char *fmt, ...)
 780 {
 781     va_list ap;
 782     va_list ap2;
 783
 784     va_start(ap, fmt);
 785     va_copy(ap2, ap);
 786     fprintf(stderr, "qemu: fatal: ");
 787     vfprintf(stderr, fmt, ap);
 788     fprintf(stderr, "\n");
 789     cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 790     if (qemu_log_enabled()) {
 791         qemu_log("qemu: fatal: ");
 792         qemu_log_vprintf(fmt, ap2);
 793         qemu_log("\n");
 794         log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 795         qemu_log_flush();
 796         qemu_log_close();
 797     }
 798     va_end(ap2);
 799     va_end(ap);
 800 #if defined(CONFIG_USER_ONLY)
 801     {
 802         struct sigaction act;
 803         sigfillset(&act.sa_mask);
 804         act.sa_handler = SIG_DFL;
 805         sigaction(SIGABRT, &act, NULL);
 806     }
 807 #endif
 808     abort();
 809 }
 810
 811 #if !defined(CONFIG_USER_ONLY)
 812 /* Called from RCU critical section */
 813 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
 814 {
 815     RAMBlock *block;
 816
 817     block = atomic_rcu_read(&ram_list.mru_block);
 818     if (block && addr - block->offset < block->max_length) {
 819         goto found;
 820     }
 821     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
 822         if (addr - block->offset < block->max_length) {
 823             goto found;
 824         }
 825     }
 826
 827     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
 828     abort();
 829
 830 found:
 831     /* It is safe to write mru_block outside the iothread lock.  This
 832      * is what happens:
 833      *
 834      *     mru_block = xxx
 835      *     rcu_read_unlock()
 836      *                                        xxx removed from list
 837      *                  rcu_read_lock()
 838      *                  read mru_block
 839      *                                        mru_block = NULL;
 840      *                                        call_rcu(reclaim_ramblock, xxx);
 841      *                  rcu_read_unlock()
 842      *
 843      * atomic_rcu_set is not needed here.  The block was already published
 844      * when it was placed into the list.  Here we're just making an extra
 845      * copy of the pointer.
 846      */
 847     ram_list.mru_block = block;
 848     return block;
 849 }
 850
 851 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
 852 {
 853     ram_addr_t start1;
 854     RAMBlock *block;
 855     ram_addr_t end;
 856
 857     end = TARGET_PAGE_ALIGN(start + length);
 858     start &= TARGET_PAGE_MASK;
 859
 860     rcu_read_lock();
 861     block = qemu_get_ram_block(start);
 862     assert(block == qemu_get_ram_block(end - 1));
 863     start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
 864     cpu_tlb_reset_dirty_all(start1, length);
 865     rcu_read_unlock();
 866 }
 867
 868 /* Note: start and end must be within the same ram block.  */
 869 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
 870                                      unsigned client)
 871 {
 872     if (length == 0)
 873         return;
 874     cpu_physical_memory_clear_dirty_range_type(start, length, client);
 875
 876     if (tcg_enabled()) {
 877         tlb_reset_dirty_range_all(start, length);
 878     }
 879 }
 880
 881 static void cpu_physical_memory_set_dirty_tracking(bool enable)
 882 {
 883     in_migration = enable;
 884 }
 885
 886 /* Called from RCU critical section */
 887 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
 888                                        MemoryRegionSection *section,
 889                                        target_ulong vaddr,
 890                                        hwaddr paddr, hwaddr xlat,
 891                                        int prot,
 892                                        target_ulong *address)
 893 {
 894     hwaddr iotlb;
 895     CPUWatchpoint *wp;
 896
 897     if (memory_region_is_ram(section->mr)) {
 898         /* Normal RAM.  */
 899         iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
 900             + xlat;
 901         if (!section->readonly) {
 902             iotlb |= PHYS_SECTION_NOTDIRTY;
 903         } else {
 904             iotlb |= PHYS_SECTION_ROM;
 905         }
 906     } else {
 907         iotlb = section - section->address_space->dispatch->map.sections;
 908         iotlb += xlat;
 909     }
 910
 911     /* Make accesses to pages with watchpoints go via the
 912        watchpoint trap routines.  */
 913     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
 914         if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
 915             /* Avoid trapping reads of pages with a write breakpoint. */
 916             if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
 917                 iotlb = PHYS_SECTION_WATCH + paddr;
 918                 *address |= TLB_MMIO;
 919                 break;
 920             }
 921         }
 922     }
 923
 924     return iotlb;
 925 }
 926 #endif /* defined(CONFIG_USER_ONLY) */
 927
 928 #if !defined(CONFIG_USER_ONLY)
 929
 930 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
 931                              uint16_t section);
 932 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
 933
 934 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
 935                                qemu_anon_ram_alloc;
 936
 937 /*
 938  * Set a custom physical guest memory alloator.
 939  * Accelerators with unusual needs may need this.  Hopefully, we can
 940  * get rid of it eventually.
 941  */
 942 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
 943 {
 944     phys_mem_alloc = alloc;
 945 }
 946
 947 static uint16_t phys_section_add(PhysPageMap *map,
 948                                  MemoryRegionSection *section)
 949 {
 950     /* The physical section number is ORed with a page-aligned
 951      * pointer to produce the iotlb entries.  Thus it should
 952      * never overflow into the page-aligned value.
 953      */
 954     assert(map->sections_nb < TARGET_PAGE_SIZE);
 955
 956     if (map->sections_nb == map->sections_nb_alloc) {
 957         map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
 958         map->sections = g_renew(MemoryRegionSection, map->sections,
 959                                 map->sections_nb_alloc);
 960     }
 961     map->sections[map->sections_nb] = *section;
 962     memory_region_ref(section->mr);
 963     return map->sections_nb++;
 964 }
 965
 966 static void phys_section_destroy(MemoryRegion *mr)
 967 {
 968     memory_region_unref(mr);
 969
 970     if (mr->subpage) {
 971         subpage_t *subpage = container_of(mr, subpage_t, iomem);
 972         object_unref(OBJECT(&subpage->iomem));
 973         g_free(subpage);
 974     }
 975 }
 976
 977 static void phys_sections_free(PhysPageMap *map)
 978 {
 979     while (map->sections_nb > 0) {
 980         MemoryRegionSection *section = &map->sections[--map->sections_nb];
 981         phys_section_destroy(section->mr);
 982     }
 983     g_free(map->sections);
 984     g_free(map->nodes);
 985 }
 986
 987 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
 988 {
 989     subpage_t *subpage;
 990     hwaddr base = section->offset_within_address_space
 991         & TARGET_PAGE_MASK;
 992     MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
 993                                                    d->map.nodes, d->map.sections);
 994     MemoryRegionSection subsection = {
 995         .offset_within_address_space = base,
 996         .size = int128_make64(TARGET_PAGE_SIZE),
 997     };
 998     hwaddr start, end;
 999
1000     assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1001
1002     if (!(existing->mr->subpage)) {
1003         subpage = subpage_init(d->as, base);
1004         subsection.address_space = d->as;
1005         subsection.mr = &subpage->iomem;
1006         phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1007                       phys_section_add(&d->map, &subsection));
1008     } else {
1009         subpage = container_of(existing->mr, subpage_t, iomem);
1010     }
1011     start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1012     end = start + int128_get64(section->size) - 1;
1013     subpage_register(subpage, start, end,
1014                      phys_section_add(&d->map, section));
1015 }
1016
1017
1018 static void register_multipage(AddressSpaceDispatch *d,
1019                                MemoryRegionSection *section)
1020 {
1021     hwaddr start_addr = section->offset_within_address_space;
1022     uint16_t section_index = phys_section_add(&d->map, section);
1023     uint64_t num_pages = int128_get64(int128_rshift(section->size,
1024                                                     TARGET_PAGE_BITS));
1025
1026     assert(num_pages);
1027     phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1028 }
1029
1030 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1031 {
1032     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1033     AddressSpaceDispatch *d = as->next_dispatch;
1034     MemoryRegionSection now = *section, remain = *section;
1035     Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1036
1037     if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1038         uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1039                        - now.offset_within_address_space;
1040
1041         now.size = int128_min(int128_make64(left), now.size);
1042         register_subpage(d, &now);
1043     } else {
1044         now.size = int128_zero();
1045     }
1046     while (int128_ne(remain.size, now.size)) {
1047         remain.size = int128_sub(remain.size, now.size);
1048         remain.offset_within_address_space += int128_get64(now.size);
1049         remain.offset_within_region += int128_get64(now.size);
1050         now = remain;
1051         if (int128_lt(remain.size, page_size)) {
1052             register_subpage(d, &now);
1053         } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1054             now.size = page_size;
1055             register_subpage(d, &now);
1056         } else {
1057             now.size = int128_and(now.size, int128_neg(page_size));
1058             register_multipage(d, &now);
1059         }
1060     }
1061 }
1062
1063 void qemu_flush_coalesced_mmio_buffer(void)
1064 {
1065     if (kvm_enabled())
1066         kvm_flush_coalesced_mmio_buffer();
1067 }
1068
1069 void qemu_mutex_lock_ramlist(void)
1070 {
1071     qemu_mutex_lock(&ram_list.mutex);
1072 }
1073
1074 void qemu_mutex_unlock_ramlist(void)
1075 {
1076     qemu_mutex_unlock(&ram_list.mutex);
1077 }
1078
1079 #ifdef __linux__
1080
1081 #include <sys/vfs.h>
1082
1083 #define HUGETLBFS_MAGIC       0x958458f6
1084
1085 static long gethugepagesize(const char *path, Error **errp)
1086 {
1087     struct statfs fs;
1088     int ret;
1089
1090     do {
1091         ret = statfs(path, &fs);
1092     } while (ret != 0 && errno == EINTR);
1093
1094     if (ret != 0) {
1095         error_setg_errno(errp, errno, "failed to get page size of file %s",
1096                          path);
1097         return 0;
1098     }
1099
1100     if (fs.f_type != HUGETLBFS_MAGIC)
1101         fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1102
1103     return fs.f_bsize;
1104 }
1105
1106 static void *file_ram_alloc(RAMBlock *block,
1107                             ram_addr_t memory,
1108                             const char *path,
1109                             Error **errp)
1110 {
1111     char *filename;
1112     char *sanitized_name;
1113     char *c;
1114     void *area = NULL;
1115     int fd;
1116     uint64_t hpagesize;
1117     Error *local_err = NULL;
1118
1119     hpagesize = gethugepagesize(path, &local_err);
1120     if (local_err) {
1121         error_propagate(errp, local_err);
1122         goto error;
1123     }
1124     block->mr->align = hpagesize;
1125
1126     if (memory < hpagesize) {
1127         error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1128                    "or larger than huge page size 0x%" PRIx64,
1129                    memory, hpagesize);
1130         goto error;
1131     }
1132
1133     if (kvm_enabled() && !kvm_has_sync_mmu()) {
1134         error_setg(errp,
1135                    "host lacks kvm mmu notifiers, -mem-path unsupported");
1136         goto error;
1137     }
1138
1139     /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1140     sanitized_name = g_strdup(memory_region_name(block->mr));
1141     for (c = sanitized_name; *c != '\0'; c++) {
1142         if (*c == '/')
1143             *c = '_';
1144     }
1145
1146     filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1147                                sanitized_name);
1148     g_free(sanitized_name);
1149
1150     fd = mkstemp(filename);
1151     if (fd < 0) {
1152         error_setg_errno(errp, errno,
1153                          "unable to create backing store for hugepages");
1154         g_free(filename);
1155         goto error;
1156     }
1157     unlink(filename);
1158     g_free(filename);
1159
1160     memory = (memory+hpagesize-1) & ~(hpagesize-1);
1161
1162     /*
1163      * ftruncate is not supported by hugetlbfs in older
1164      * hosts, so don't bother bailing out on errors.
1165      * If anything goes wrong with it under other filesystems,
1166      * mmap will fail.
1167      */
1168     if (ftruncate(fd, memory)) {
1169         perror("ftruncate");
1170     }
1171
1172     area = mmap(0, memory, PROT_READ | PROT_WRITE,
1173                 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
1174                 fd, 0);
1175     if (area == MAP_FAILED) {
1176         error_setg_errno(errp, errno,
1177                          "unable to map backing store for hugepages");
1178         close(fd);
1179         goto error;
1180     }
1181
1182     if (mem_prealloc) {
1183         os_mem_prealloc(fd, area, memory);
1184     }
1185
1186     block->fd = fd;
1187     return area;
1188
1189 error:
1190     if (mem_prealloc) {
1191         error_report("%s\n", error_get_pretty(*errp));
1192         exit(1);
1193     }
1194     return NULL;
1195 }
1196 #endif
1197
1198 /* Called with the ramlist lock held.  */
1199 static ram_addr_t find_ram_offset(ram_addr_t size)
1200 {
1201     RAMBlock *block, *next_block;
1202     ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1203
1204     assert(size != 0); /* it would hand out same offset multiple times */
1205
1206     if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1207         return 0;
1208     }
1209
1210     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1211         ram_addr_t end, next = RAM_ADDR_MAX;
1212
1213         end = block->offset + block->max_length;
1214
1215         QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1216             if (next_block->offset >= end) {
1217                 next = MIN(next, next_block->offset);
1218             }
1219         }
1220         if (next - end >= size && next - end < mingap) {
1221             offset = end;
1222             mingap = next - end;
1223         }
1224     }
1225
1226     if (offset == RAM_ADDR_MAX) {
1227         fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1228                 (uint64_t)size);
1229         abort();
1230     }
1231
1232     return offset;
1233 }
1234
1235 ram_addr_t last_ram_offset(void)
1236 {
1237     RAMBlock *block;
1238     ram_addr_t last = 0;
1239
1240     rcu_read_lock();
1241     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1242         last = MAX(last, block->offset + block->max_length);
1243     }
1244     rcu_read_unlock();
1245     return last;
1246 }
1247
1248 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1249 {
1250     int ret;
1251
1252     /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1253     if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1254                            "dump-guest-core", true)) {
1255         ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1256         if (ret) {
1257             perror("qemu_madvise");
1258             fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1259                             "but dump_guest_core=off specified\n");
1260         }
1261     }
1262 }
1263
1264 /* Called within an RCU critical section, or while the ramlist lock
1265  * is held.
1266  */
1267 static RAMBlock *find_ram_block(ram_addr_t addr)
1268 {
1269     RAMBlock *block;
1270
1271     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1272         if (block->offset == addr) {
1273             return block;
1274         }
1275     }
1276
1277     return NULL;
1278 }
1279
1280 /* Called with iothread lock held.  */
1281 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1282 {
1283     RAMBlock *new_block, *block;
1284
1285     rcu_read_lock();
1286     new_block = find_ram_block(addr);
1287     assert(new_block);
1288     assert(!new_block->idstr[0]);
1289
1290     if (dev) {
1291         char *id = qdev_get_dev_path(dev);
1292         if (id) {
1293             snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1294             g_free(id);
1295         }
1296     }
1297     pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1298
1299     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1300         if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1301             fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1302                     new_block->idstr);
1303             abort();
1304         }
1305     }
1306     rcu_read_unlock();
1307 }
1308
1309 /* Called with iothread lock held.  */
1310 void qemu_ram_unset_idstr(ram_addr_t addr)
1311 {
1312     RAMBlock *block;
1313
1314     /* FIXME: arch_init.c assumes that this is not called throughout
1315      * migration.  Ignore the problem since hot-unplug during migration
1316      * does not work anyway.
1317      */
1318
1319     rcu_read_lock();
1320     block = find_ram_block(addr);
1321     if (block) {
1322         memset(block->idstr, 0, sizeof(block->idstr));
1323     }
1324     rcu_read_unlock();
1325 }
1326
1327 static int memory_try_enable_merging(void *addr, size_t len)
1328 {
1329     if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1330         /* disabled by the user */
1331         return 0;
1332     }
1333
1334     return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1335 }
1336
1337 /* Only legal before guest might have detected the memory size: e.g. on
1338  * incoming migration, or right after reset.
1339  *
1340  * As memory core doesn't know how is memory accessed, it is up to
1341  * resize callback to update device state and/or add assertions to detect
1342  * misuse, if necessary.
1343  */
1344 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1345 {
1346     RAMBlock *block = find_ram_block(base);
1347
1348     assert(block);
1349
1350     if (block->used_length == newsize) {
1351         return 0;
1352     }
1353
1354     if (!(block->flags & RAM_RESIZEABLE)) {
1355         error_setg_errno(errp, EINVAL,
1356                          "Length mismatch: %s: 0x" RAM_ADDR_FMT
1357                          " in != 0x" RAM_ADDR_FMT, block->idstr,
1358                          newsize, block->used_length);
1359         return -EINVAL;
1360     }
1361
1362     if (block->max_length < newsize) {
1363         error_setg_errno(errp, EINVAL,
1364                          "Length too large: %s: 0x" RAM_ADDR_FMT
1365                          " > 0x" RAM_ADDR_FMT, block->idstr,
1366                          newsize, block->max_length);
1367         return -EINVAL;
1368     }
1369
1370     cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1371     block->used_length = newsize;
1372     cpu_physical_memory_set_dirty_range(block->offset, block->used_length);
1373     memory_region_set_size(block->mr, newsize);
1374     if (block->resized) {
1375         block->resized(block->idstr, newsize, block->host);
1376     }
1377     return 0;
1378 }
1379
1380 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1381 {
1382     RAMBlock *block;
1383     RAMBlock *last_block = NULL;
1384     ram_addr_t old_ram_size, new_ram_size;
1385
1386     old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1387
1388     qemu_mutex_lock_ramlist();
1389     new_block->offset = find_ram_offset(new_block->max_length);
1390
1391     if (!new_block->host) {
1392         if (xen_enabled()) {
1393             xen_ram_alloc(new_block->offset, new_block->max_length,
1394                           new_block->mr);
1395         } else {
1396             new_block->host = phys_mem_alloc(new_block->max_length,
1397                                              &new_block->mr->align);
1398             if (!new_block->host) {
1399                 error_setg_errno(errp, errno,
1400                                  "cannot set up guest memory '%s'",
1401                                  memory_region_name(new_block->mr));
1402                 qemu_mutex_unlock_ramlist();
1403                 return -1;
1404             }
1405             memory_try_enable_merging(new_block->host, new_block->max_length);
1406         }
1407     }
1408
1409     /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
1410      * QLIST (which has an RCU-friendly variant) does not have insertion at
1411      * tail, so save the last element in last_block.
1412      */
1413     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1414         last_block = block;
1415         if (block->max_length < new_block->max_length) {
1416             break;
1417         }
1418     }
1419     if (block) {
1420         QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1421     } else if (last_block) {
1422         QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1423     } else { /* list is empty */
1424         QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1425     }
1426     ram_list.mru_block = NULL;
1427
1428     /* Write list before version */
1429     smp_wmb();
1430     ram_list.version++;
1431     qemu_mutex_unlock_ramlist();
1432
1433     new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1434
1435     if (new_ram_size > old_ram_size) {
1436         int i;
1437
1438         /* ram_list.dirty_memory[] is protected by the iothread lock.  */
1439         for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1440             ram_list.dirty_memory[i] =
1441                 bitmap_zero_extend(ram_list.dirty_memory[i],
1442                                    old_ram_size, new_ram_size);
1443        }
1444     }
1445     cpu_physical_memory_set_dirty_range(new_block->offset,
1446                                         new_block->used_length);
1447
1448     if (new_block->host) {
1449         qemu_ram_setup_dump(new_block->host, new_block->max_length);
1450         qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1451         qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1452         if (kvm_enabled()) {
1453             kvm_setup_guest_memory(new_block->host, new_block->max_length);
1454         }
1455     }
1456
1457     return new_block->offset;
1458 }
1459
1460 #ifdef __linux__
1461 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1462                                     bool share, const char *mem_path,
1463                                     Error **errp)
1464 {
1465     RAMBlock *new_block;
1466     ram_addr_t addr;
1467     Error *local_err = NULL;
1468
1469     if (xen_enabled()) {
1470         error_setg(errp, "-mem-path not supported with Xen");
1471         return -1;
1472     }
1473
1474     if (phys_mem_alloc != qemu_anon_ram_alloc) {
1475         /*
1476          * file_ram_alloc() needs to allocate just like
1477          * phys_mem_alloc, but we haven't bothered to provide
1478          * a hook there.
1479          */
1480         error_setg(errp,
1481                    "-mem-path not supported with this accelerator");
1482         return -1;
1483     }
1484
1485     size = TARGET_PAGE_ALIGN(size);
1486     new_block = g_malloc0(sizeof(*new_block));
1487     new_block->mr = mr;
1488     new_block->used_length = size;
1489     new_block->max_length = size;
1490     new_block->flags = share ? RAM_SHARED : 0;
1491     new_block->host = file_ram_alloc(new_block, size,
1492                                      mem_path, errp);
1493     if (!new_block->host) {
1494         g_free(new_block);
1495         return -1;
1496     }
1497
1498     addr = ram_block_add(new_block, &local_err);
1499     if (local_err) {
1500         g_free(new_block);
1501         error_propagate(errp, local_err);
1502         return -1;
1503     }
1504     return addr;
1505 }
1506 #endif
1507
1508 static
1509 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1510                                    void (*resized)(const char*,
1511                                                    uint64_t length,
1512                                                    void *host),
1513                                    void *host, bool resizeable,
1514                                    MemoryRegion *mr, Error **errp)
1515 {
1516     RAMBlock *new_block;
1517     ram_addr_t addr;
1518     Error *local_err = NULL;
1519
1520     size = TARGET_PAGE_ALIGN(size);
1521     max_size = TARGET_PAGE_ALIGN(max_size);
1522     new_block = g_malloc0(sizeof(*new_block));
1523     new_block->mr = mr;
1524     new_block->resized = resized;
1525     new_block->used_length = size;
1526     new_block->max_length = max_size;
1527     assert(max_size >= size);
1528     new_block->fd = -1;
1529     new_block->host = host;
1530     if (host) {
1531         new_block->flags |= RAM_PREALLOC;
1532     }
1533     if (resizeable) {
1534         new_block->flags |= RAM_RESIZEABLE;
1535     }
1536     addr = ram_block_add(new_block, &local_err);
1537     if (local_err) {
1538         g_free(new_block);
1539         error_propagate(errp, local_err);
1540         return -1;
1541     }
1542     return addr;
1543 }
1544
1545 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1546                                    MemoryRegion *mr, Error **errp)
1547 {
1548     return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1549 }
1550
1551 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1552 {
1553     return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1554 }
1555
1556 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1557                                      void (*resized)(const char*,
1558                                                      uint64_t length,
1559                                                      void *host),
1560                                      MemoryRegion *mr, Error **errp)
1561 {
1562     return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1563 }
1564
1565 void qemu_ram_free_from_ptr(ram_addr_t addr)
1566 {
1567     RAMBlock *block;
1568
1569     qemu_mutex_lock_ramlist();
1570     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1571         if (addr == block->offset) {
1572             QLIST_REMOVE_RCU(block, next);
1573             ram_list.mru_block = NULL;
1574             /* Write list before version */
1575             smp_wmb();
1576             ram_list.version++;
1577             g_free_rcu(block, rcu);
1578             break;
1579         }
1580     }
1581     qemu_mutex_unlock_ramlist();
1582 }
1583
1584 static void reclaim_ramblock(RAMBlock *block)
1585 {
1586     if (block->flags & RAM_PREALLOC) {
1587         ;
1588     } else if (xen_enabled()) {
1589         xen_invalidate_map_cache_entry(block->host);
1590 #ifndef _WIN32
1591     } else if (block->fd >= 0) {
1592         munmap(block->host, block->max_length);
1593         close(block->fd);
1594 #endif
1595     } else {
1596         qemu_anon_ram_free(block->host, block->max_length);
1597     }
1598     g_free(block);
1599 }
1600
1601 void qemu_ram_free(ram_addr_t addr)
1602 {
1603     RAMBlock *block;
1604
1605     qemu_mutex_lock_ramlist();
1606     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1607         if (addr == block->offset) {
1608             QLIST_REMOVE_RCU(block, next);
1609             ram_list.mru_block = NULL;
1610             /* Write list before version */
1611             smp_wmb();
1612             ram_list.version++;
1613             call_rcu(block, reclaim_ramblock, rcu);
1614             break;
1615         }
1616     }
1617     qemu_mutex_unlock_ramlist();
1618 }
1619
1620 #ifndef _WIN32
1621 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1622 {
1623     RAMBlock *block;
1624     ram_addr_t offset;
1625     int flags;
1626     void *area, *vaddr;
1627
1628     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1629         offset = addr - block->offset;
1630         if (offset < block->max_length) {
1631             vaddr = ramblock_ptr(block, offset);
1632             if (block->flags & RAM_PREALLOC) {
1633                 ;
1634             } else if (xen_enabled()) {
1635                 abort();
1636             } else {
1637                 flags = MAP_FIXED;
1638                 munmap(vaddr, length);
1639                 if (block->fd >= 0) {
1640                     flags |= (block->flags & RAM_SHARED ?
1641                               MAP_SHARED : MAP_PRIVATE);
1642                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1643                                 flags, block->fd, offset);
1644                 } else {
1645                     /*
1646                      * Remap needs to match alloc.  Accelerators that
1647                      * set phys_mem_alloc never remap.  If they did,
1648                      * we'd need a remap hook here.
1649                      */
1650                     assert(phys_mem_alloc == qemu_anon_ram_alloc);
1651
1652                     flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1653                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1654                                 flags, -1, 0);
1655                 }
1656                 if (area != vaddr) {
1657                     fprintf(stderr, "Could not remap addr: "
1658                             RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1659                             length, addr);
1660                     exit(1);
1661                 }
1662                 memory_try_enable_merging(vaddr, length);
1663                 qemu_ram_setup_dump(vaddr, length);
1664             }
1665         }
1666     }
1667 }
1668 #endif /* !_WIN32 */
1669
1670 int qemu_get_ram_fd(ram_addr_t addr)
1671 {
1672     RAMBlock *block;
1673     int fd;
1674
1675     rcu_read_lock();
1676     block = qemu_get_ram_block(addr);
1677     fd = block->fd;
1678     rcu_read_unlock();
1679     return fd;
1680 }
1681
1682 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1683 {
1684     RAMBlock *block;
1685     void *ptr;
1686
1687     rcu_read_lock();
1688     block = qemu_get_ram_block(addr);
1689     ptr = ramblock_ptr(block, 0);
1690     rcu_read_unlock();
1691     return ptr;
1692 }
1693
1694 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1695  * This should not be used for general purpose DMA.  Use address_space_map
1696  * or address_space_rw instead. For local memory (e.g. video ram) that the
1697  * device owns, use memory_region_get_ram_ptr.
1698  *
1699  * By the time this function returns, the returned pointer is not protected
1700  * by RCU anymore.  If the caller is not within an RCU critical section and
1701  * does not hold the iothread lock, it must have other means of protecting the
1702  * pointer, such as a reference to the region that includes the incoming
1703  * ram_addr_t.
1704  */
1705 void *qemu_get_ram_ptr(ram_addr_t addr)
1706 {
1707     RAMBlock *block;
1708     void *ptr;
1709
1710     rcu_read_lock();
1711     block = qemu_get_ram_block(addr);
1712
1713     if (xen_enabled() && block->host == NULL) {
1714         /* We need to check if the requested address is in the RAM
1715          * because we don't want to map the entire memory in QEMU.
1716          * In that case just map until the end of the page.
1717          */
1718         if (block->offset == 0) {
1719             ptr = xen_map_cache(addr, 0, 0);
1720             goto unlock;
1721         }
1722
1723         block->host = xen_map_cache(block->offset, block->max_length, 1);
1724     }
1725     ptr = ramblock_ptr(block, addr - block->offset);
1726
1727 unlock:
1728     rcu_read_unlock();
1729     return ptr;
1730 }
1731
1732 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1733  * but takes a size argument.
1734  *
1735  * By the time this function returns, the returned pointer is not protected
1736  * by RCU anymore.  If the caller is not within an RCU critical section and
1737  * does not hold the iothread lock, it must have other means of protecting the
1738  * pointer, such as a reference to the region that includes the incoming
1739  * ram_addr_t.
1740  */
1741 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1742 {
1743     void *ptr;
1744     if (*size == 0) {
1745         return NULL;
1746     }
1747     if (xen_enabled()) {
1748         return xen_map_cache(addr, *size, 1);
1749     } else {
1750         RAMBlock *block;
1751         rcu_read_lock();
1752         QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1753             if (addr - block->offset < block->max_length) {
1754                 if (addr - block->offset + *size > block->max_length)
1755                     *size = block->max_length - addr + block->offset;
1756                 ptr = ramblock_ptr(block, addr - block->offset);
1757                 rcu_read_unlock();
1758                 return ptr;
1759             }
1760         }
1761
1762         fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1763         abort();
1764     }
1765 }
1766
1767 /* Some of the softmmu routines need to translate from a host pointer
1768  * (typically a TLB entry) back to a ram offset.
1769  *
1770  * By the time this function returns, the returned pointer is not protected
1771  * by RCU anymore.  If the caller is not within an RCU critical section and
1772  * does not hold the iothread lock, it must have other means of protecting the
1773  * pointer, such as a reference to the region that includes the incoming
1774  * ram_addr_t.
1775  */
1776 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1777 {
1778     RAMBlock *block;
1779     uint8_t *host = ptr;
1780     MemoryRegion *mr;
1781
1782     if (xen_enabled()) {
1783         rcu_read_lock();
1784         *ram_addr = xen_ram_addr_from_mapcache(ptr);
1785         mr = qemu_get_ram_block(*ram_addr)->mr;
1786         rcu_read_unlock();
1787         return mr;
1788     }
1789
1790     rcu_read_lock();
1791     block = atomic_rcu_read(&ram_list.mru_block);
1792     if (block && block->host && host - block->host < block->max_length) {
1793         goto found;
1794     }
1795
1796     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1797         /* This case append when the block is not mapped. */
1798         if (block->host == NULL) {
1799             continue;
1800         }
1801         if (host - block->host < block->max_length) {
1802             goto found;
1803         }
1804     }
1805
1806     rcu_read_unlock();
1807     return NULL;
1808
1809 found:
1810     *ram_addr = block->offset + (host - block->host);
1811     mr = block->mr;
1812     rcu_read_unlock();
1813     return mr;
1814 }
1815
1816 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1817                                uint64_t val, unsigned size)
1818 {
1819     if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1820         tb_invalidate_phys_page_fast(ram_addr, size);
1821     }
1822     switch (size) {
1823     case 1:
1824         stb_p(qemu_get_ram_ptr(ram_addr), val);
1825         break;
1826     case 2:
1827         stw_p(qemu_get_ram_ptr(ram_addr), val);
1828         break;
1829     case 4:
1830         stl_p(qemu_get_ram_ptr(ram_addr), val);
1831         break;
1832     default:
1833         abort();
1834     }
1835     cpu_physical_memory_set_dirty_range_nocode(ram_addr, size);
1836     /* we remove the notdirty callback only if the code has been
1837        flushed */
1838     if (!cpu_physical_memory_is_clean(ram_addr)) {
1839         CPUArchState *env = current_cpu->env_ptr;
1840         tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1841     }
1842 }
1843
1844 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1845                                  unsigned size, bool is_write)
1846 {
1847     return is_write;
1848 }
1849
1850 static const MemoryRegionOps notdirty_mem_ops = {
1851     .write = notdirty_mem_write,
1852     .valid.accepts = notdirty_mem_accepts,
1853     .endianness = DEVICE_NATIVE_ENDIAN,
1854 };
1855
1856 /* Generate a debug exception if a watchpoint has been hit.  */
1857 static void check_watchpoint(int offset, int len, int flags)
1858 {
1859     CPUState *cpu = current_cpu;
1860     CPUArchState *env = cpu->env_ptr;
1861     target_ulong pc, cs_base;
1862     target_ulong vaddr;
1863     CPUWatchpoint *wp;
1864     int cpu_flags;
1865
1866     if (cpu->watchpoint_hit) {
1867         /* We re-entered the check after replacing the TB. Now raise
1868          * the debug interrupt so that is will trigger after the
1869          * current instruction. */
1870         cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1871         return;
1872     }
1873     vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1874     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1875         if (cpu_watchpoint_address_matches(wp, vaddr, len)
1876             && (wp->flags & flags)) {
1877             if (flags == BP_MEM_READ) {
1878                 wp->flags |= BP_WATCHPOINT_HIT_READ;
1879             } else {
1880                 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
1881             }
1882             wp->hitaddr = vaddr;
1883             if (!cpu->watchpoint_hit) {
1884                 cpu->watchpoint_hit = wp;
1885                 tb_check_watchpoint(cpu);
1886                 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1887                     cpu->exception_index = EXCP_DEBUG;
1888                     cpu_loop_exit(cpu);
1889                 } else {
1890                     cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1891                     tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1892                     cpu_resume_from_signal(cpu, NULL);
1893                 }
1894             }
1895         } else {
1896             wp->flags &= ~BP_WATCHPOINT_HIT;
1897         }
1898     }
1899 }
1900
1901 /* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
1902    so these check for a hit then pass through to the normal out-of-line
1903    phys routines.  */
1904 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1905                                unsigned size)
1906 {
1907     check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_READ);
1908     switch (size) {
1909     case 1: return ldub_phys(&address_space_memory, addr);
1910     case 2: return lduw_phys(&address_space_memory, addr);
1911     case 4: return ldl_phys(&address_space_memory, addr);
1912     default: abort();
1913     }
1914 }
1915
1916 static void watch_mem_write(void *opaque, hwaddr addr,
1917                             uint64_t val, unsigned size)
1918 {
1919     check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_WRITE);
1920     switch (size) {
1921     case 1:
1922         stb_phys(&address_space_memory, addr, val);
1923         break;
1924     case 2:
1925         stw_phys(&address_space_memory, addr, val);
1926         break;
1927     case 4:
1928         stl_phys(&address_space_memory, addr, val);
1929         break;
1930     default: abort();
1931     }
1932 }
1933
1934 static const MemoryRegionOps watch_mem_ops = {
1935     .read = watch_mem_read,
1936     .write = watch_mem_write,
1937     .endianness = DEVICE_NATIVE_ENDIAN,
1938 };
1939
1940 static uint64_t subpage_read(void *opaque, hwaddr addr,
1941                              unsigned len)
1942 {
1943     subpage_t *subpage = opaque;
1944     uint8_t buf[8];
1945
1946 #if defined(DEBUG_SUBPAGE)
1947     printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1948            subpage, len, addr);
1949 #endif
1950     address_space_read(subpage->as, addr + subpage->base, buf, len);
1951     switch (len) {
1952     case 1:
1953         return ldub_p(buf);
1954     case 2:
1955         return lduw_p(buf);
1956     case 4:
1957         return ldl_p(buf);
1958     case 8:
1959         return ldq_p(buf);
1960     default:
1961         abort();
1962     }
1963 }
1964
1965 static void subpage_write(void *opaque, hwaddr addr,
1966                           uint64_t value, unsigned len)
1967 {
1968     subpage_t *subpage = opaque;
1969     uint8_t buf[8];
1970
1971 #if defined(DEBUG_SUBPAGE)
1972     printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1973            " value %"PRIx64"\n",
1974            __func__, subpage, len, addr, value);
1975 #endif
1976     switch (len) {
1977     case 1:
1978         stb_p(buf, value);
1979         break;
1980     case 2:
1981         stw_p(buf, value);
1982         break;
1983     case 4:
1984         stl_p(buf, value);
1985         break;
1986     case 8:
1987         stq_p(buf, value);
1988         break;
1989     default:
1990         abort();
1991     }
1992     address_space_write(subpage->as, addr + subpage->base, buf, len);
1993 }
1994
1995 static bool subpage_accepts(void *opaque, hwaddr addr,
1996                             unsigned len, bool is_write)
1997 {
1998     subpage_t *subpage = opaque;
1999 #if defined(DEBUG_SUBPAGE)
2000     printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2001            __func__, subpage, is_write ? 'w' : 'r', len, addr);
2002 #endif
2003
2004     return address_space_access_valid(subpage->as, addr + subpage->base,
2005                                       len, is_write);
2006 }
2007
2008 static const MemoryRegionOps subpage_ops = {
2009     .read = subpage_read,
2010     .write = subpage_write,
2011     .impl.min_access_size = 1,
2012     .impl.max_access_size = 8,
2013     .valid.min_access_size = 1,
2014     .valid.max_access_size = 8,
2015     .valid.accepts = subpage_accepts,
2016     .endianness = DEVICE_NATIVE_ENDIAN,
2017 };
2018
2019 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2020                              uint16_t section)
2021 {
2022     int idx, eidx;
2023
2024     if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2025         return -1;
2026     idx = SUBPAGE_IDX(start);
2027     eidx = SUBPAGE_IDX(end);
2028 #if defined(DEBUG_SUBPAGE)
2029     printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2030            __func__, mmio, start, end, idx, eidx, section);
2031 #endif
2032     for (; idx <= eidx; idx++) {
2033         mmio->sub_section[idx] = section;
2034     }
2035
2036     return 0;
2037 }
2038
2039 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2040 {
2041     subpage_t *mmio;
2042
2043     mmio = g_malloc0(sizeof(subpage_t));
2044
2045     mmio->as = as;
2046     mmio->base = base;
2047     memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2048                           NULL, TARGET_PAGE_SIZE);
2049     mmio->iomem.subpage = true;
2050 #if defined(DEBUG_SUBPAGE)
2051     printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2052            mmio, base, TARGET_PAGE_SIZE);
2053 #endif
2054     subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2055
2056     return mmio;
2057 }
2058
2059 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2060                               MemoryRegion *mr)
2061 {
2062     assert(as);
2063     MemoryRegionSection section = {
2064         .address_space = as,
2065         .mr = mr,
2066         .offset_within_address_space = 0,
2067         .offset_within_region = 0,
2068         .size = int128_2_64(),
2069     };
2070
2071     return phys_section_add(map, &section);
2072 }
2073
2074 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
2075 {
2076     AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch);
2077     MemoryRegionSection *sections = d->map.sections;
2078
2079     return sections[index & ~TARGET_PAGE_MASK].mr;
2080 }
2081
2082 static void io_mem_init(void)
2083 {
2084     memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2085     memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2086                           NULL, UINT64_MAX);
2087     memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2088                           NULL, UINT64_MAX);
2089     memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2090                           NULL, UINT64_MAX);
2091 }
2092
2093 static void mem_begin(MemoryListener *listener)
2094 {
2095     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2096     AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2097     uint16_t n;
2098
2099     n = dummy_section(&d->map, as, &io_mem_unassigned);
2100     assert(n == PHYS_SECTION_UNASSIGNED);
2101     n = dummy_section(&d->map, as, &io_mem_notdirty);
2102     assert(n == PHYS_SECTION_NOTDIRTY);
2103     n = dummy_section(&d->map, as, &io_mem_rom);
2104     assert(n == PHYS_SECTION_ROM);
2105     n = dummy_section(&d->map, as, &io_mem_watch);
2106     assert(n == PHYS_SECTION_WATCH);
2107
2108     d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2109     d->as = as;
2110     as->next_dispatch = d;
2111 }
2112
2113 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2114 {
2115     phys_sections_free(&d->map);
2116     g_free(d);
2117 }
2118
2119 static void mem_commit(MemoryListener *listener)
2120 {
2121     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2122     AddressSpaceDispatch *cur = as->dispatch;
2123     AddressSpaceDispatch *next = as->next_dispatch;
2124
2125     phys_page_compact_all(next, next->map.nodes_nb);
2126
2127     atomic_rcu_set(&as->dispatch, next);
2128     if (cur) {
2129         call_rcu(cur, address_space_dispatch_free, rcu);
2130     }
2131 }
2132
2133 static void tcg_commit(MemoryListener *listener)
2134 {
2135     CPUState *cpu;
2136
2137     /* since each CPU stores ram addresses in its TLB cache, we must
2138        reset the modified entries */
2139     /* XXX: slow ! */
2140     CPU_FOREACH(cpu) {
2141         /* FIXME: Disentangle the cpu.h circular files deps so we can
2142            directly get the right CPU from listener.  */
2143         if (cpu->tcg_as_listener != listener) {
2144             continue;
2145         }
2146         cpu_reload_memory_map(cpu);
2147     }
2148 }
2149
2150 static void core_log_global_start(MemoryListener *listener)
2151 {
2152     cpu_physical_memory_set_dirty_tracking(true);
2153 }
2154
2155 static void core_log_global_stop(MemoryListener *listener)
2156 {
2157     cpu_physical_memory_set_dirty_tracking(false);
2158 }
2159
2160 static MemoryListener core_memory_listener = {
2161     .log_global_start = core_log_global_start,
2162     .log_global_stop = core_log_global_stop,
2163     .priority = 1,
2164 };
2165
2166 void address_space_init_dispatch(AddressSpace *as)
2167 {
2168     as->dispatch = NULL;
2169     as->dispatch_listener = (MemoryListener) {
2170         .begin = mem_begin,
2171         .commit = mem_commit,
2172         .region_add = mem_add,
2173         .region_nop = mem_add,
2174         .priority = 0,
2175     };
2176     memory_listener_register(&as->dispatch_listener, as);
2177 }
2178
2179 void address_space_unregister(AddressSpace *as)
2180 {
2181     memory_listener_unregister(&as->dispatch_listener);
2182 }
2183
2184 void address_space_destroy_dispatch(AddressSpace *as)
2185 {
2186     AddressSpaceDispatch *d = as->dispatch;
2187
2188     atomic_rcu_set(&as->dispatch, NULL);
2189     if (d) {
2190         call_rcu(d, address_space_dispatch_free, rcu);
2191     }
2192 }
2193
2194 static void memory_map_init(void)
2195 {
2196     system_memory = g_malloc(sizeof(*system_memory));
2197
2198     memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2199     address_space_init(&address_space_memory, system_memory, "memory");
2200
2201     system_io = g_malloc(sizeof(*system_io));
2202     memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2203                           65536);
2204     address_space_init(&address_space_io, system_io, "I/O");
2205
2206     memory_listener_register(&core_memory_listener, &address_space_memory);
2207 }
2208
2209 MemoryRegion *get_system_memory(void)
2210 {
2211     return system_memory;
2212 }
2213
2214 MemoryRegion *get_system_io(void)
2215 {
2216     return system_io;
2217 }
2218
2219 #endif /* !defined(CONFIG_USER_ONLY) */
2220
2221 /* physical memory access (slow version, mainly for debug) */
2222 #if defined(CONFIG_USER_ONLY)
2223 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2224                         uint8_t *buf, int len, int is_write)
2225 {
2226     int l, flags;
2227     target_ulong page;
2228     void * p;
2229
2230     while (len > 0) {
2231         page = addr & TARGET_PAGE_MASK;
2232         l = (page + TARGET_PAGE_SIZE) - addr;
2233         if (l > len)
2234             l = len;
2235         flags = page_get_flags(page);
2236         if (!(flags & PAGE_VALID))
2237             return -1;
2238         if (is_write) {
2239             if (!(flags & PAGE_WRITE))
2240                 return -1;
2241             /* XXX: this code should not depend on lock_user */
2242             if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2243                 return -1;
2244             memcpy(p, buf, l);
2245             unlock_user(p, addr, l);
2246         } else {
2247             if (!(flags & PAGE_READ))
2248                 return -1;
2249             /* XXX: this code should not depend on lock_user */
2250             if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2251                 return -1;
2252             memcpy(buf, p, l);
2253             unlock_user(p, addr, 0);
2254         }
2255         len -= l;
2256         buf += l;
2257         addr += l;
2258     }
2259     return 0;
2260 }
2261
2262 #else
2263
2264 static void invalidate_and_set_dirty(hwaddr addr,
2265                                      hwaddr length)
2266 {
2267     if (cpu_physical_memory_range_includes_clean(addr, length)) {
2268         tb_invalidate_phys_range(addr, addr + length, 0);
2269         cpu_physical_memory_set_dirty_range_nocode(addr, length);
2270     }
2271     xen_modified_memory(addr, length);
2272 }
2273
2274 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2275 {
2276     unsigned access_size_max = mr->ops->valid.max_access_size;
2277
2278     /* Regions are assumed to support 1-4 byte accesses unless
2279        otherwise specified.  */
2280     if (access_size_max == 0) {
2281         access_size_max = 4;
2282     }
2283
2284     /* Bound the maximum access by the alignment of the address.  */
2285     if (!mr->ops->impl.unaligned) {
2286         unsigned align_size_max = addr & -addr;
2287         if (align_size_max != 0 && align_size_max < access_size_max) {
2288             access_size_max = align_size_max;
2289         }
2290     }
2291
2292     /* Don't attempt accesses larger than the maximum.  */
2293     if (l > access_size_max) {
2294         l = access_size_max;
2295     }
2296     if (l & (l - 1)) {
2297         l = 1 << (qemu_fls(l) - 1);
2298     }
2299
2300     return l;
2301 }
2302
2303 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
2304                       int len, bool is_write)
2305 {
2306     hwaddr l;
2307     uint8_t *ptr;
2308     uint64_t val;
2309     hwaddr addr1;
2310     MemoryRegion *mr;
2311     bool error = false;
2312
2313     while (len > 0) {
2314         l = len;
2315         mr = address_space_translate(as, addr, &addr1, &l, is_write);
2316
2317         if (is_write) {
2318             if (!memory_access_is_direct(mr, is_write)) {
2319                 l = memory_access_size(mr, l, addr1);
2320                 /* XXX: could force current_cpu to NULL to avoid
2321                    potential bugs */
2322                 switch (l) {
2323                 case 8:
2324                     /* 64 bit write access */
2325                     val = ldq_p(buf);
2326                     error |= io_mem_write(mr, addr1, val, 8);
2327                     break;
2328                 case 4:
2329                     /* 32 bit write access */
2330                     val = ldl_p(buf);
2331                     error |= io_mem_write(mr, addr1, val, 4);
2332                     break;
2333                 case 2:
2334                     /* 16 bit write access */
2335                     val = lduw_p(buf);
2336                     error |= io_mem_write(mr, addr1, val, 2);
2337                     break;
2338                 case 1:
2339                     /* 8 bit write access */
2340                     val = ldub_p(buf);
2341                     error |= io_mem_write(mr, addr1, val, 1);
2342                     break;
2343                 default:
2344                     abort();
2345                 }
2346             } else {
2347                 addr1 += memory_region_get_ram_addr(mr);
2348                 /* RAM case */
2349                 ptr = qemu_get_ram_ptr(addr1);
2350                 memcpy(ptr, buf, l);
2351                 invalidate_and_set_dirty(addr1, l);
2352             }
2353         } else {
2354             if (!memory_access_is_direct(mr, is_write)) {
2355                 /* I/O case */
2356                 l = memory_access_size(mr, l, addr1);
2357                 switch (l) {
2358                 case 8:
2359                     /* 64 bit read access */
2360                     error |= io_mem_read(mr, addr1, &val, 8);
2361                     stq_p(buf, val);
2362                     break;
2363                 case 4:
2364                     /* 32 bit read access */
2365                     error |= io_mem_read(mr, addr1, &val, 4);
2366                     stl_p(buf, val);
2367                     break;
2368                 case 2:
2369                     /* 16 bit read access */
2370                     error |= io_mem_read(mr, addr1, &val, 2);
2371                     stw_p(buf, val);
2372                     break;
2373                 case 1:
2374                     /* 8 bit read access */
2375                     error |= io_mem_read(mr, addr1, &val, 1);
2376                     stb_p(buf, val);
2377                     break;
2378                 default:
2379                     abort();
2380                 }
2381             } else {
2382                 /* RAM case */
2383                 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2384                 memcpy(buf, ptr, l);
2385             }
2386         }
2387         len -= l;
2388         buf += l;
2389         addr += l;
2390     }
2391
2392     return error;
2393 }
2394
2395 bool address_space_write(AddressSpace *as, hwaddr addr,
2396                          const uint8_t *buf, int len)
2397 {
2398     return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2399 }
2400
2401 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2402 {
2403     return address_space_rw(as, addr, buf, len, false);
2404 }
2405
2406
2407 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2408                             int len, int is_write)
2409 {
2410     address_space_rw(&address_space_memory, addr, buf, len, is_write);
2411 }
2412
2413 enum write_rom_type {
2414     WRITE_DATA,
2415     FLUSH_CACHE,
2416 };
2417
2418 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2419     hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2420 {
2421     hwaddr l;
2422     uint8_t *ptr;
2423     hwaddr addr1;
2424     MemoryRegion *mr;
2425
2426     while (len > 0) {
2427         l = len;
2428         mr = address_space_translate(as, addr, &addr1, &l, true);
2429
2430         if (!(memory_region_is_ram(mr) ||
2431               memory_region_is_romd(mr))) {
2432             /* do nothing */
2433         } else {
2434             addr1 += memory_region_get_ram_addr(mr);
2435             /* ROM/RAM case */
2436             ptr = qemu_get_ram_ptr(addr1);
2437             switch (type) {
2438             case WRITE_DATA:
2439                 memcpy(ptr, buf, l);
2440                 invalidate_and_set_dirty(addr1, l);
2441                 break;
2442             case FLUSH_CACHE:
2443                 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2444                 break;
2445             }
2446         }
2447         len -= l;
2448         buf += l;
2449         addr += l;
2450     }
2451 }
2452
2453 /* used for ROM loading : can write in RAM and ROM */
2454 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2455                                    const uint8_t *buf, int len)
2456 {
2457     cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2458 }
2459
2460 void cpu_flush_icache_range(hwaddr start, int len)
2461 {
2462     /*
2463      * This function should do the same thing as an icache flush that was
2464      * triggered from within the guest. For TCG we are always cache coherent,
2465      * so there is no need to flush anything. For KVM / Xen we need to flush
2466      * the host's instruction cache at least.
2467      */
2468     if (tcg_enabled()) {
2469         return;
2470     }
2471
2472     cpu_physical_memory_write_rom_internal(&address_space_memory,
2473                                            start, NULL, len, FLUSH_CACHE);
2474 }
2475
2476 typedef struct {
2477     MemoryRegion *mr;
2478     void *buffer;
2479     hwaddr addr;
2480     hwaddr len;
2481 } BounceBuffer;
2482
2483 static BounceBuffer bounce;
2484
2485 typedef struct MapClient {
2486     void *opaque;
2487     void (*callback)(void *opaque);
2488     QLIST_ENTRY(MapClient) link;
2489 } MapClient;
2490
2491 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2492     = QLIST_HEAD_INITIALIZER(map_client_list);
2493
2494 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2495 {
2496     MapClient *client = g_malloc(sizeof(*client));
2497
2498     client->opaque = opaque;
2499     client->callback = callback;
2500     QLIST_INSERT_HEAD(&map_client_list, client, link);
2501     return client;
2502 }
2503
2504 static void cpu_unregister_map_client(void *_client)
2505 {
2506     MapClient *client = (MapClient *)_client;
2507
2508     QLIST_REMOVE(client, link);
2509     g_free(client);
2510 }
2511
2512 static void cpu_notify_map_clients(void)
2513 {
2514     MapClient *client;
2515
2516     while (!QLIST_EMPTY(&map_client_list)) {
2517         client = QLIST_FIRST(&map_client_list);
2518         client->callback(client->opaque);
2519         cpu_unregister_map_client(client);
2520     }
2521 }
2522
2523 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2524 {
2525     MemoryRegion *mr;
2526     hwaddr l, xlat;
2527
2528     while (len > 0) {
2529         l = len;
2530         mr = address_space_translate(as, addr, &xlat, &l, is_write);
2531         if (!memory_access_is_direct(mr, is_write)) {
2532             l = memory_access_size(mr, l, addr);
2533             if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2534                 return false;
2535             }
2536         }
2537
2538         len -= l;
2539         addr += l;
2540     }
2541     return true;
2542 }
2543
2544 /* Map a physical memory region into a host virtual address.
2545  * May map a subset of the requested range, given by and returned in *plen.
2546  * May return NULL if resources needed to perform the mapping are exhausted.
2547  * Use only for reads OR writes - not for read-modify-write operations.
2548  * Use cpu_register_map_client() to know when retrying the map operation is
2549  * likely to succeed.
2550  */
2551 void *address_space_map(AddressSpace *as,
2552                         hwaddr addr,
2553                         hwaddr *plen,
2554                         bool is_write)
2555 {
2556     hwaddr len = *plen;
2557     hwaddr done = 0;
2558     hwaddr l, xlat, base;
2559     MemoryRegion *mr, *this_mr;
2560     ram_addr_t raddr;
2561
2562     if (len == 0) {
2563         return NULL;
2564     }
2565
2566     l = len;
2567     mr = address_space_translate(as, addr, &xlat, &l, is_write);
2568     if (!memory_access_is_direct(mr, is_write)) {
2569         if (bounce.buffer) {
2570             return NULL;
2571         }
2572         /* Avoid unbounded allocations */
2573         l = MIN(l, TARGET_PAGE_SIZE);
2574         bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2575         bounce.addr = addr;
2576         bounce.len = l;
2577
2578         memory_region_ref(mr);
2579         bounce.mr = mr;
2580         if (!is_write) {
2581             address_space_read(as, addr, bounce.buffer, l);
2582         }
2583
2584         *plen = l;
2585         return bounce.buffer;
2586     }
2587
2588     base = xlat;
2589     raddr = memory_region_get_ram_addr(mr);
2590
2591     for (;;) {
2592         len -= l;
2593         addr += l;
2594         done += l;
2595         if (len == 0) {
2596             break;
2597         }
2598
2599         l = len;
2600         this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2601         if (this_mr != mr || xlat != base + done) {
2602             break;
2603         }
2604     }
2605
2606     memory_region_ref(mr);
2607     *plen = done;
2608     return qemu_ram_ptr_length(raddr + base, plen);
2609 }
2610
2611 /* Unmaps a memory region previously mapped by address_space_map().
2612  * Will also mark the memory as dirty if is_write == 1.  access_len gives
2613  * the amount of memory that was actually read or written by the caller.
2614  */
2615 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2616                          int is_write, hwaddr access_len)
2617 {
2618     if (buffer != bounce.buffer) {
2619         MemoryRegion *mr;
2620         ram_addr_t addr1;
2621
2622         mr = qemu_ram_addr_from_host(buffer, &addr1);
2623         assert(mr != NULL);
2624         if (is_write) {
2625             invalidate_and_set_dirty(addr1, access_len);
2626         }
2627         if (xen_enabled()) {
2628             xen_invalidate_map_cache_entry(buffer);
2629         }
2630         memory_region_unref(mr);
2631         return;
2632     }
2633     if (is_write) {
2634         address_space_write(as, bounce.addr, bounce.buffer, access_len);
2635     }
2636     qemu_vfree(bounce.buffer);
2637     bounce.buffer = NULL;
2638     memory_region_unref(bounce.mr);
2639     cpu_notify_map_clients();
2640 }
2641
2642 void *cpu_physical_memory_map(hwaddr addr,
2643                               hwaddr *plen,
2644                               int is_write)
2645 {
2646     return address_space_map(&address_space_memory, addr, plen, is_write);
2647 }
2648
2649 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2650                                int is_write, hwaddr access_len)
2651 {
2652     return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2653 }
2654
2655 /* warning: addr must be aligned */
2656 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2657                                          enum device_endian endian)
2658 {
2659     uint8_t *ptr;
2660     uint64_t val;
2661     MemoryRegion *mr;
2662     hwaddr l = 4;
2663     hwaddr addr1;
2664
2665     mr = address_space_translate(as, addr, &addr1, &l, false);
2666     if (l < 4 || !memory_access_is_direct(mr, false)) {
2667         /* I/O case */
2668         io_mem_read(mr, addr1, &val, 4);
2669 #if defined(TARGET_WORDS_BIGENDIAN)
2670         if (endian == DEVICE_LITTLE_ENDIAN) {
2671             val = bswap32(val);
2672         }
2673 #else
2674         if (endian == DEVICE_BIG_ENDIAN) {
2675             val = bswap32(val);
2676         }
2677 #endif
2678     } else {
2679         /* RAM case */
2680         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2681                                 & TARGET_PAGE_MASK)
2682                                + addr1);
2683         switch (endian) {
2684         case DEVICE_LITTLE_ENDIAN:
2685             val = ldl_le_p(ptr);
2686             break;
2687         case DEVICE_BIG_ENDIAN:
2688             val = ldl_be_p(ptr);
2689             break;
2690         default:
2691             val = ldl_p(ptr);
2692             break;
2693         }
2694     }
2695     return val;
2696 }
2697
2698 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2699 {
2700     return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2701 }
2702
2703 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2704 {
2705     return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2706 }
2707
2708 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2709 {
2710     return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2711 }
2712
2713 /* warning: addr must be aligned */
2714 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2715                                          enum device_endian endian)
2716 {
2717     uint8_t *ptr;
2718     uint64_t val;
2719     MemoryRegion *mr;
2720     hwaddr l = 8;
2721     hwaddr addr1;
2722
2723     mr = address_space_translate(as, addr, &addr1, &l,
2724                                  false);
2725     if (l < 8 || !memory_access_is_direct(mr, false)) {
2726         /* I/O case */
2727         io_mem_read(mr, addr1, &val, 8);
2728 #if defined(TARGET_WORDS_BIGENDIAN)
2729         if (endian == DEVICE_LITTLE_ENDIAN) {
2730             val = bswap64(val);
2731         }
2732 #else
2733         if (endian == DEVICE_BIG_ENDIAN) {
2734             val = bswap64(val);
2735         }
2736 #endif
2737     } else {
2738         /* RAM case */
2739         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2740                                 & TARGET_PAGE_MASK)
2741                                + addr1);
2742         switch (endian) {
2743         case DEVICE_LITTLE_ENDIAN:
2744             val = ldq_le_p(ptr);
2745             break;
2746         case DEVICE_BIG_ENDIAN:
2747             val = ldq_be_p(ptr);
2748             break;
2749         default:
2750             val = ldq_p(ptr);
2751             break;
2752         }
2753     }
2754     return val;
2755 }
2756
2757 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2758 {
2759     return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2760 }
2761
2762 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2763 {
2764     return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2765 }
2766
2767 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2768 {
2769     return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2770 }
2771
2772 /* XXX: optimize */
2773 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2774 {
2775     uint8_t val;
2776     address_space_rw(as, addr, &val, 1, 0);
2777     return val;
2778 }
2779
2780 /* warning: addr must be aligned */
2781 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2782                                           enum device_endian endian)
2783 {
2784     uint8_t *ptr;
2785     uint64_t val;
2786     MemoryRegion *mr;
2787     hwaddr l = 2;
2788     hwaddr addr1;
2789
2790     mr = address_space_translate(as, addr, &addr1, &l,
2791                                  false);
2792     if (l < 2 || !memory_access_is_direct(mr, false)) {
2793         /* I/O case */
2794         io_mem_read(mr, addr1, &val, 2);
2795 #if defined(TARGET_WORDS_BIGENDIAN)
2796         if (endian == DEVICE_LITTLE_ENDIAN) {
2797             val = bswap16(val);
2798         }
2799 #else
2800         if (endian == DEVICE_BIG_ENDIAN) {
2801             val = bswap16(val);
2802         }
2803 #endif
2804     } else {
2805         /* RAM case */
2806         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2807                                 & TARGET_PAGE_MASK)
2808                                + addr1);
2809         switch (endian) {
2810         case DEVICE_LITTLE_ENDIAN:
2811             val = lduw_le_p(ptr);
2812             break;
2813         case DEVICE_BIG_ENDIAN:
2814             val = lduw_be_p(ptr);
2815             break;
2816         default:
2817             val = lduw_p(ptr);
2818             break;
2819         }
2820     }
2821     return val;
2822 }
2823
2824 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2825 {
2826     return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2827 }
2828
2829 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2830 {
2831     return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2832 }
2833
2834 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2835 {
2836     return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2837 }
2838
2839 /* warning: addr must be aligned. The ram page is not masked as dirty
2840    and the code inside is not invalidated. It is useful if the dirty
2841    bits are used to track modified PTEs */
2842 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2843 {
2844     uint8_t *ptr;
2845     MemoryRegion *mr;
2846     hwaddr l = 4;
2847     hwaddr addr1;
2848
2849     mr = address_space_translate(as, addr, &addr1, &l,
2850                                  true);
2851     if (l < 4 || !memory_access_is_direct(mr, true)) {
2852         io_mem_write(mr, addr1, val, 4);
2853     } else {
2854         addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2855         ptr = qemu_get_ram_ptr(addr1);
2856         stl_p(ptr, val);
2857
2858         if (unlikely(in_migration)) {
2859             if (cpu_physical_memory_is_clean(addr1)) {
2860                 /* invalidate code */
2861                 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2862                 /* set dirty bit */
2863                 cpu_physical_memory_set_dirty_range_nocode(addr1, 4);
2864             }
2865         }
2866     }
2867 }
2868
2869 /* warning: addr must be aligned */
2870 static inline void stl_phys_internal(AddressSpace *as,
2871                                      hwaddr addr, uint32_t val,
2872                                      enum device_endian endian)
2873 {
2874     uint8_t *ptr;
2875     MemoryRegion *mr;
2876     hwaddr l = 4;
2877     hwaddr addr1;
2878
2879     mr = address_space_translate(as, addr, &addr1, &l,
2880                                  true);
2881     if (l < 4 || !memory_access_is_direct(mr, true)) {
2882 #if defined(TARGET_WORDS_BIGENDIAN)
2883         if (endian == DEVICE_LITTLE_ENDIAN) {
2884             val = bswap32(val);
2885         }
2886 #else
2887         if (endian == DEVICE_BIG_ENDIAN) {
2888             val = bswap32(val);
2889         }
2890 #endif
2891         io_mem_write(mr, addr1, val, 4);
2892     } else {
2893         /* RAM case */
2894         addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2895         ptr = qemu_get_ram_ptr(addr1);
2896         switch (endian) {
2897         case DEVICE_LITTLE_ENDIAN:
2898             stl_le_p(ptr, val);
2899             break;
2900         case DEVICE_BIG_ENDIAN:
2901             stl_be_p(ptr, val);
2902             break;
2903         default:
2904             stl_p(ptr, val);
2905             break;
2906         }
2907         invalidate_and_set_dirty(addr1, 4);
2908     }
2909 }
2910
2911 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2912 {
2913     stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2914 }
2915
2916 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2917 {
2918     stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2919 }
2920
2921 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2922 {
2923     stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2924 }
2925
2926 /* XXX: optimize */
2927 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2928 {
2929     uint8_t v = val;
2930     address_space_rw(as, addr, &v, 1, 1);
2931 }
2932
2933 /* warning: addr must be aligned */
2934 static inline void stw_phys_internal(AddressSpace *as,
2935                                      hwaddr addr, uint32_t val,
2936                                      enum device_endian endian)
2937 {
2938     uint8_t *ptr;
2939     MemoryRegion *mr;
2940     hwaddr l = 2;
2941     hwaddr addr1;
2942
2943     mr = address_space_translate(as, addr, &addr1, &l, true);
2944     if (l < 2 || !memory_access_is_direct(mr, true)) {
2945 #if defined(TARGET_WORDS_BIGENDIAN)
2946         if (endian == DEVICE_LITTLE_ENDIAN) {
2947             val = bswap16(val);
2948         }
2949 #else
2950         if (endian == DEVICE_BIG_ENDIAN) {
2951             val = bswap16(val);
2952         }
2953 #endif
2954         io_mem_write(mr, addr1, val, 2);
2955     } else {
2956         /* RAM case */
2957         addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2958         ptr = qemu_get_ram_ptr(addr1);
2959         switch (endian) {
2960         case DEVICE_LITTLE_ENDIAN:
2961             stw_le_p(ptr, val);
2962             break;
2963         case DEVICE_BIG_ENDIAN:
2964             stw_be_p(ptr, val);
2965             break;
2966         default:
2967             stw_p(ptr, val);
2968             break;
2969         }
2970         invalidate_and_set_dirty(addr1, 2);
2971     }
2972 }
2973
2974 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2975 {
2976     stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2977 }
2978
2979 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2980 {
2981     stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2982 }
2983
2984 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2985 {
2986     stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2987 }
2988
2989 /* XXX: optimize */
2990 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2991 {
2992     val = tswap64(val);
2993     address_space_rw(as, addr, (void *) &val, 8, 1);
2994 }
2995
2996 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2997 {
2998     val = cpu_to_le64(val);
2999     address_space_rw(as, addr, (void *) &val, 8, 1);
3000 }
3001
3002 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3003 {
3004     val = cpu_to_be64(val);
3005     address_space_rw(as, addr, (void *) &val, 8, 1);
3006 }
3007
3008 /* virtual memory access for debug (includes writing to ROM) */
3009 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3010                         uint8_t *buf, int len, int is_write)
3011 {
3012     int l;
3013     hwaddr phys_addr;
3014     target_ulong page;
3015
3016     while (len > 0) {
3017         page = addr & TARGET_PAGE_MASK;
3018         phys_addr = cpu_get_phys_page_debug(cpu, page);
3019         /* if no physical page mapped, return an error */
3020         if (phys_addr == -1)
3021             return -1;
3022         l = (page + TARGET_PAGE_SIZE) - addr;
3023         if (l > len)
3024             l = len;
3025         phys_addr += (addr & ~TARGET_PAGE_MASK);
3026         if (is_write) {
3027             cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
3028         } else {
3029             address_space_rw(cpu->as, phys_addr, buf, l, 0);
3030         }
3031         len -= l;
3032         buf += l;
3033         addr += l;
3034     }
3035     return 0;
3036 }
3037 #endif
3038
3039 /*
3040  * A helper function for the _utterly broken_ virtio device model to find out if
3041  * it's running on a big endian machine. Don't do this at home kids!
3042  */
3043 bool target_words_bigendian(void);
3044 bool target_words_bigendian(void)
3045 {
3046 #if defined(TARGET_WORDS_BIGENDIAN)
3047     return true;
3048 #else
3049     return false;
3050 #endif
3051 }
3052
3053 #ifndef CONFIG_USER_ONLY
3054 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3055 {
3056     MemoryRegion*mr;
3057     hwaddr l = 1;
3058
3059     mr = address_space_translate(&address_space_memory,
3060                                  phys_addr, &phys_addr, &l, false);
3061
3062     return !(memory_region_is_ram(mr) ||
3063              memory_region_is_romd(mr));
3064 }
3065
3066 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3067 {
3068     RAMBlock *block;
3069
3070     rcu_read_lock();
3071     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3072         func(block->host, block->offset, block->used_length, opaque);
3073     }
3074     rcu_read_unlock();
3075 }
3076 #endif