exec.c

   1 /*
   2  *  Virtual page mapping
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include "config.h"
  20 #ifndef _WIN32
  21 #include <sys/types.h>
  22 #include <sys/mman.h>
  23 #endif
  24
  25 #include "qemu-common.h"
  26 #include "cpu.h"
  27 #include "tcg.h"
  28 #include "hw/hw.h"
  29 #if !defined(CONFIG_USER_ONLY)
  30 #include "hw/boards.h"
  31 #endif
  32 #include "hw/qdev.h"
  33 #include "qemu/osdep.h"
  34 #include "sysemu/kvm.h"
  35 #include "sysemu/sysemu.h"
  36 #include "hw/xen/xen.h"
  37 #include "qemu/timer.h"
  38 #include "qemu/config-file.h"
  39 #include "qemu/error-report.h"
  40 #include "exec/memory.h"
  41 #include "sysemu/dma.h"
  42 #include "exec/address-spaces.h"
  43 #if defined(CONFIG_USER_ONLY)
  44 #include <qemu.h>
  45 #else /* !CONFIG_USER_ONLY */
  46 #include "sysemu/xen-mapcache.h"
  47 #include "trace.h"
  48 #endif
  49 #include "exec/cpu-all.h"
  50 #include "qemu/rcu_queue.h"
  51 #include "exec/cputlb.h"
  52 #include "translate-all.h"
  53
  54 #include "exec/memory-internal.h"
  55 #include "exec/ram_addr.h"
  56
  57 #include "qemu/range.h"
  58
  59 //#define DEBUG_SUBPAGE
  60
  61 #if !defined(CONFIG_USER_ONLY)
  62 static bool in_migration;
  63
  64 /* ram_list is read under rcu_read_lock()/rcu_read_unlock().  Writes
  65  * are protected by the ramlist lock.
  66  */
  67 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
  68
  69 static MemoryRegion *system_memory;
  70 static MemoryRegion *system_io;
  71
  72 AddressSpace address_space_io;
  73 AddressSpace address_space_memory;
  74
  75 MemoryRegion io_mem_rom, io_mem_notdirty;
  76 static MemoryRegion io_mem_unassigned;
  77
  78 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
  79 #define RAM_PREALLOC   (1 << 0)
  80
  81 /* RAM is mmap-ed with MAP_SHARED */
  82 #define RAM_SHARED     (1 << 1)
  83
  84 /* Only a portion of RAM (used_length) is actually used, and migrated.
  85  * This used_length size can change across reboots.
  86  */
  87 #define RAM_RESIZEABLE (1 << 2)
  88
  89 #endif
  90
  91 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
  92 /* current CPU in the current thread. It is only valid inside
  93    cpu_exec() */
  94 DEFINE_TLS(CPUState *, current_cpu);
  95 /* 0 = Do not count executed instructions.
  96    1 = Precise instruction counting.
  97    2 = Adaptive rate instruction counting.  */
  98 int use_icount;
  99
 100 #if !defined(CONFIG_USER_ONLY)
 101
 102 typedef struct PhysPageEntry PhysPageEntry;
 103
 104 struct PhysPageEntry {
 105     /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
 106     uint32_t skip : 6;
 107      /* index into phys_sections (!skip) or phys_map_nodes (skip) */
 108     uint32_t ptr : 26;
 109 };
 110
 111 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
 112
 113 /* Size of the L2 (and L3, etc) page tables.  */
 114 #define ADDR_SPACE_BITS 64
 115
 116 #define P_L2_BITS 9
 117 #define P_L2_SIZE (1 << P_L2_BITS)
 118
 119 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
 120
 121 typedef PhysPageEntry Node[P_L2_SIZE];
 122
 123 typedef struct PhysPageMap {
 124     struct rcu_head rcu;
 125
 126     unsigned sections_nb;
 127     unsigned sections_nb_alloc;
 128     unsigned nodes_nb;
 129     unsigned nodes_nb_alloc;
 130     Node *nodes;
 131     MemoryRegionSection *sections;
 132 } PhysPageMap;
 133
 134 struct AddressSpaceDispatch {
 135     struct rcu_head rcu;
 136
 137     /* This is a multi-level map on the physical address space.
 138      * The bottom level has pointers to MemoryRegionSections.
 139      */
 140     PhysPageEntry phys_map;
 141     PhysPageMap map;
 142     AddressSpace *as;
 143 };
 144
 145 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
 146 typedef struct subpage_t {
 147     MemoryRegion iomem;
 148     AddressSpace *as;
 149     hwaddr base;
 150     uint16_t sub_section[TARGET_PAGE_SIZE];
 151 } subpage_t;
 152
 153 #define PHYS_SECTION_UNASSIGNED 0
 154 #define PHYS_SECTION_NOTDIRTY 1
 155 #define PHYS_SECTION_ROM 2
 156 #define PHYS_SECTION_WATCH 3
 157
 158 static void io_mem_init(void);
 159 static void memory_map_init(void);
 160 static void tcg_commit(MemoryListener *listener);
 161
 162 static MemoryRegion io_mem_watch;
 163 #endif
 164
 165 #if !defined(CONFIG_USER_ONLY)
 166
 167 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
 168 {
 169     if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
 170         map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
 171         map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
 172         map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
 173     }
 174 }
 175
 176 static uint32_t phys_map_node_alloc(PhysPageMap *map)
 177 {
 178     unsigned i;
 179     uint32_t ret;
 180
 181     ret = map->nodes_nb++;
 182     assert(ret != PHYS_MAP_NODE_NIL);
 183     assert(ret != map->nodes_nb_alloc);
 184     for (i = 0; i < P_L2_SIZE; ++i) {
 185         map->nodes[ret][i].skip = 1;
 186         map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
 187     }
 188     return ret;
 189 }
 190
 191 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
 192                                 hwaddr *index, hwaddr *nb, uint16_t leaf,
 193                                 int level)
 194 {
 195     PhysPageEntry *p;
 196     int i;
 197     hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
 198
 199     if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
 200         lp->ptr = phys_map_node_alloc(map);
 201         p = map->nodes[lp->ptr];
 202         if (level == 0) {
 203             for (i = 0; i < P_L2_SIZE; i++) {
 204                 p[i].skip = 0;
 205                 p[i].ptr = PHYS_SECTION_UNASSIGNED;
 206             }
 207         }
 208     } else {
 209         p = map->nodes[lp->ptr];
 210     }
 211     lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
 212
 213     while (*nb && lp < &p[P_L2_SIZE]) {
 214         if ((*index & (step - 1)) == 0 && *nb >= step) {
 215             lp->skip = 0;
 216             lp->ptr = leaf;
 217             *index += step;
 218             *nb -= step;
 219         } else {
 220             phys_page_set_level(map, lp, index, nb, leaf, level - 1);
 221         }
 222         ++lp;
 223     }
 224 }
 225
 226 static void phys_page_set(AddressSpaceDispatch *d,
 227                           hwaddr index, hwaddr nb,
 228                           uint16_t leaf)
 229 {
 230     /* Wildly overreserve - it doesn't matter much. */
 231     phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
 232
 233     phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
 234 }
 235
 236 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
 237  * and update our entry so we can skip it and go directly to the destination.
 238  */
 239 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
 240 {
 241     unsigned valid_ptr = P_L2_SIZE;
 242     int valid = 0;
 243     PhysPageEntry *p;
 244     int i;
 245
 246     if (lp->ptr == PHYS_MAP_NODE_NIL) {
 247         return;
 248     }
 249
 250     p = nodes[lp->ptr];
 251     for (i = 0; i < P_L2_SIZE; i++) {
 252         if (p[i].ptr == PHYS_MAP_NODE_NIL) {
 253             continue;
 254         }
 255
 256         valid_ptr = i;
 257         valid++;
 258         if (p[i].skip) {
 259             phys_page_compact(&p[i], nodes, compacted);
 260         }
 261     }
 262
 263     /* We can only compress if there's only one child. */
 264     if (valid != 1) {
 265         return;
 266     }
 267
 268     assert(valid_ptr < P_L2_SIZE);
 269
 270     /* Don't compress if it won't fit in the # of bits we have. */
 271     if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
 272         return;
 273     }
 274
 275     lp->ptr = p[valid_ptr].ptr;
 276     if (!p[valid_ptr].skip) {
 277         /* If our only child is a leaf, make this a leaf. */
 278         /* By design, we should have made this node a leaf to begin with so we
 279          * should never reach here.
 280          * But since it's so simple to handle this, let's do it just in case we
 281          * change this rule.
 282          */
 283         lp->skip = 0;
 284     } else {
 285         lp->skip += p[valid_ptr].skip;
 286     }
 287 }
 288
 289 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
 290 {
 291     DECLARE_BITMAP(compacted, nodes_nb);
 292
 293     if (d->phys_map.skip) {
 294         phys_page_compact(&d->phys_map, d->map.nodes, compacted);
 295     }
 296 }
 297
 298 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
 299                                            Node *nodes, MemoryRegionSection *sections)
 300 {
 301     PhysPageEntry *p;
 302     hwaddr index = addr >> TARGET_PAGE_BITS;
 303     int i;
 304
 305     for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
 306         if (lp.ptr == PHYS_MAP_NODE_NIL) {
 307             return &sections[PHYS_SECTION_UNASSIGNED];
 308         }
 309         p = nodes[lp.ptr];
 310         lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
 311     }
 312
 313     if (sections[lp.ptr].size.hi ||
 314         range_covers_byte(sections[lp.ptr].offset_within_address_space,
 315                           sections[lp.ptr].size.lo, addr)) {
 316         return &sections[lp.ptr];
 317     } else {
 318         return &sections[PHYS_SECTION_UNASSIGNED];
 319     }
 320 }
 321
 322 bool memory_region_is_unassigned(MemoryRegion *mr)
 323 {
 324     return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
 325         && mr != &io_mem_watch;
 326 }
 327
 328 /* Called from RCU critical section */
 329 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
 330                                                         hwaddr addr,
 331                                                         bool resolve_subpage)
 332 {
 333     MemoryRegionSection *section;
 334     subpage_t *subpage;
 335
 336     section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
 337     if (resolve_subpage && section->mr->subpage) {
 338         subpage = container_of(section->mr, subpage_t, iomem);
 339         section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
 340     }
 341     return section;
 342 }
 343
 344 /* Called from RCU critical section */
 345 static MemoryRegionSection *
 346 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
 347                                  hwaddr *plen, bool resolve_subpage)
 348 {
 349     MemoryRegionSection *section;
 350     Int128 diff;
 351
 352     section = address_space_lookup_region(d, addr, resolve_subpage);
 353     /* Compute offset within MemoryRegionSection */
 354     addr -= section->offset_within_address_space;
 355
 356     /* Compute offset within MemoryRegion */
 357     *xlat = addr + section->offset_within_region;
 358
 359     diff = int128_sub(section->mr->size, int128_make64(addr));
 360     *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
 361     return section;
 362 }
 363
 364 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
 365 {
 366     if (memory_region_is_ram(mr)) {
 367         return !(is_write && mr->readonly);
 368     }
 369     if (memory_region_is_romd(mr)) {
 370         return !is_write;
 371     }
 372
 373     return false;
 374 }
 375
 376 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
 377                                       hwaddr *xlat, hwaddr *plen,
 378                                       bool is_write)
 379 {
 380     IOMMUTLBEntry iotlb;
 381     MemoryRegionSection *section;
 382     MemoryRegion *mr;
 383     hwaddr len = *plen;
 384
 385     rcu_read_lock();
 386     for (;;) {
 387         AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
 388         section = address_space_translate_internal(d, addr, &addr, plen, true);
 389         mr = section->mr;
 390
 391         if (!mr->iommu_ops) {
 392             break;
 393         }
 394
 395         iotlb = mr->iommu_ops->translate(mr, addr, is_write);
 396         addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
 397                 | (addr & iotlb.addr_mask));
 398         len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
 399         if (!(iotlb.perm & (1 << is_write))) {
 400             mr = &io_mem_unassigned;
 401             break;
 402         }
 403
 404         as = iotlb.target_as;
 405     }
 406
 407     if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
 408         hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
 409         len = MIN(page, len);
 410     }
 411
 412     *plen = len;
 413     *xlat = addr;
 414     rcu_read_unlock();
 415     return mr;
 416 }
 417
 418 /* Called from RCU critical section */
 419 MemoryRegionSection *
 420 address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
 421                                   hwaddr *xlat, hwaddr *plen)
 422 {
 423     MemoryRegionSection *section;
 424     section = address_space_translate_internal(cpu->memory_dispatch,
 425                                                addr, xlat, plen, false);
 426
 427     assert(!section->mr->iommu_ops);
 428     return section;
 429 }
 430 #endif
 431
 432 void cpu_exec_init_all(void)
 433 {
 434 #if !defined(CONFIG_USER_ONLY)
 435     qemu_mutex_init(&ram_list.mutex);
 436     memory_map_init();
 437     io_mem_init();
 438 #endif
 439 }
 440
 441 #if !defined(CONFIG_USER_ONLY)
 442
 443 static int cpu_common_post_load(void *opaque, int version_id)
 444 {
 445     CPUState *cpu = opaque;
 446
 447     /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
 448        version_id is increased. */
 449     cpu->interrupt_request &= ~0x01;
 450     tlb_flush(cpu, 1);
 451
 452     return 0;
 453 }
 454
 455 static int cpu_common_pre_load(void *opaque)
 456 {
 457     CPUState *cpu = opaque;
 458
 459     cpu->exception_index = -1;
 460
 461     return 0;
 462 }
 463
 464 static bool cpu_common_exception_index_needed(void *opaque)
 465 {
 466     CPUState *cpu = opaque;
 467
 468     return tcg_enabled() && cpu->exception_index != -1;
 469 }
 470
 471 static const VMStateDescription vmstate_cpu_common_exception_index = {
 472     .name = "cpu_common/exception_index",
 473     .version_id = 1,
 474     .minimum_version_id = 1,
 475     .fields = (VMStateField[]) {
 476         VMSTATE_INT32(exception_index, CPUState),
 477         VMSTATE_END_OF_LIST()
 478     }
 479 };
 480
 481 const VMStateDescription vmstate_cpu_common = {
 482     .name = "cpu_common",
 483     .version_id = 1,
 484     .minimum_version_id = 1,
 485     .pre_load = cpu_common_pre_load,
 486     .post_load = cpu_common_post_load,
 487     .fields = (VMStateField[]) {
 488         VMSTATE_UINT32(halted, CPUState),
 489         VMSTATE_UINT32(interrupt_request, CPUState),
 490         VMSTATE_END_OF_LIST()
 491     },
 492     .subsections = (VMStateSubsection[]) {
 493         {
 494             .vmsd = &vmstate_cpu_common_exception_index,
 495             .needed = cpu_common_exception_index_needed,
 496         } , {
 497             /* empty */
 498         }
 499     }
 500 };
 501
 502 #endif
 503
 504 CPUState *qemu_get_cpu(int index)
 505 {
 506     CPUState *cpu;
 507
 508     CPU_FOREACH(cpu) {
 509         if (cpu->cpu_index == index) {
 510             return cpu;
 511         }
 512     }
 513
 514     return NULL;
 515 }
 516
 517 #if !defined(CONFIG_USER_ONLY)
 518 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
 519 {
 520     /* We only support one address space per cpu at the moment.  */
 521     assert(cpu->as == as);
 522
 523     if (cpu->tcg_as_listener) {
 524         memory_listener_unregister(cpu->tcg_as_listener);
 525     } else {
 526         cpu->tcg_as_listener = g_new0(MemoryListener, 1);
 527     }
 528     cpu->tcg_as_listener->commit = tcg_commit;
 529     memory_listener_register(cpu->tcg_as_listener, as);
 530 }
 531 #endif
 532
 533 void cpu_exec_init(CPUArchState *env)
 534 {
 535     CPUState *cpu = ENV_GET_CPU(env);
 536     CPUClass *cc = CPU_GET_CLASS(cpu);
 537     CPUState *some_cpu;
 538     int cpu_index;
 539
 540 #if defined(CONFIG_USER_ONLY)
 541     cpu_list_lock();
 542 #endif
 543     cpu_index = 0;
 544     CPU_FOREACH(some_cpu) {
 545         cpu_index++;
 546     }
 547     cpu->cpu_index = cpu_index;
 548     cpu->numa_node = 0;
 549     QTAILQ_INIT(&cpu->breakpoints);
 550     QTAILQ_INIT(&cpu->watchpoints);
 551 #ifndef CONFIG_USER_ONLY
 552     cpu->as = &address_space_memory;
 553     cpu->thread_id = qemu_get_thread_id();
 554 #endif
 555     QTAILQ_INSERT_TAIL(&cpus, cpu, node);
 556 #if defined(CONFIG_USER_ONLY)
 557     cpu_list_unlock();
 558 #endif
 559     if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
 560         vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
 561     }
 562 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
 563     register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
 564                     cpu_save, cpu_load, env);
 565     assert(cc->vmsd == NULL);
 566     assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
 567 #endif
 568     if (cc->vmsd != NULL) {
 569         vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
 570     }
 571 }
 572
 573 #if defined(CONFIG_USER_ONLY)
 574 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 575 {
 576     tb_invalidate_phys_page_range(pc, pc + 1, 0);
 577 }
 578 #else
 579 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 580 {
 581     hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
 582     if (phys != -1) {
 583         tb_invalidate_phys_addr(cpu->as,
 584                                 phys | (pc & ~TARGET_PAGE_MASK));
 585     }
 586 }
 587 #endif
 588
 589 #if defined(CONFIG_USER_ONLY)
 590 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 591
 592 {
 593 }
 594
 595 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 596                           int flags)
 597 {
 598     return -ENOSYS;
 599 }
 600
 601 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 602 {
 603 }
 604
 605 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 606                           int flags, CPUWatchpoint **watchpoint)
 607 {
 608     return -ENOSYS;
 609 }
 610 #else
 611 /* Add a watchpoint.  */
 612 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 613                           int flags, CPUWatchpoint **watchpoint)
 614 {
 615     CPUWatchpoint *wp;
 616
 617     /* forbid ranges which are empty or run off the end of the address space */
 618     if (len == 0 || (addr + len - 1) < addr) {
 619         error_report("tried to set invalid watchpoint at %"
 620                      VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
 621         return -EINVAL;
 622     }
 623     wp = g_malloc(sizeof(*wp));
 624
 625     wp->vaddr = addr;
 626     wp->len = len;
 627     wp->flags = flags;
 628
 629     /* keep all GDB-injected watchpoints in front */
 630     if (flags & BP_GDB) {
 631         QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
 632     } else {
 633         QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
 634     }
 635
 636     tlb_flush_page(cpu, addr);
 637
 638     if (watchpoint)
 639         *watchpoint = wp;
 640     return 0;
 641 }
 642
 643 /* Remove a specific watchpoint.  */
 644 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 645                           int flags)
 646 {
 647     CPUWatchpoint *wp;
 648
 649     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
 650         if (addr == wp->vaddr && len == wp->len
 651                 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
 652             cpu_watchpoint_remove_by_ref(cpu, wp);
 653             return 0;
 654         }
 655     }
 656     return -ENOENT;
 657 }
 658
 659 /* Remove a specific watchpoint by reference.  */
 660 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 661 {
 662     QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
 663
 664     tlb_flush_page(cpu, watchpoint->vaddr);
 665
 666     g_free(watchpoint);
 667 }
 668
 669 /* Remove all matching watchpoints.  */
 670 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 671 {
 672     CPUWatchpoint *wp, *next;
 673
 674     QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
 675         if (wp->flags & mask) {
 676             cpu_watchpoint_remove_by_ref(cpu, wp);
 677         }
 678     }
 679 }
 680
 681 /* Return true if this watchpoint address matches the specified
 682  * access (ie the address range covered by the watchpoint overlaps
 683  * partially or completely with the address range covered by the
 684  * access).
 685  */
 686 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
 687                                                   vaddr addr,
 688                                                   vaddr len)
 689 {
 690     /* We know the lengths are non-zero, but a little caution is
 691      * required to avoid errors in the case where the range ends
 692      * exactly at the top of the address space and so addr + len
 693      * wraps round to zero.
 694      */
 695     vaddr wpend = wp->vaddr + wp->len - 1;
 696     vaddr addrend = addr + len - 1;
 697
 698     return !(addr > wpend || wp->vaddr > addrend);
 699 }
 700
 701 #endif
 702
 703 /* Add a breakpoint.  */
 704 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
 705                           CPUBreakpoint **breakpoint)
 706 {
 707     CPUBreakpoint *bp;
 708
 709     bp = g_malloc(sizeof(*bp));
 710
 711     bp->pc = pc;
 712     bp->flags = flags;
 713
 714     /* keep all GDB-injected breakpoints in front */
 715     if (flags & BP_GDB) {
 716         QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
 717     } else {
 718         QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
 719     }
 720
 721     breakpoint_invalidate(cpu, pc);
 722
 723     if (breakpoint) {
 724         *breakpoint = bp;
 725     }
 726     return 0;
 727 }
 728
 729 /* Remove a specific breakpoint.  */
 730 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
 731 {
 732     CPUBreakpoint *bp;
 733
 734     QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
 735         if (bp->pc == pc && bp->flags == flags) {
 736             cpu_breakpoint_remove_by_ref(cpu, bp);
 737             return 0;
 738         }
 739     }
 740     return -ENOENT;
 741 }
 742
 743 /* Remove a specific breakpoint by reference.  */
 744 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
 745 {
 746     QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
 747
 748     breakpoint_invalidate(cpu, breakpoint->pc);
 749
 750     g_free(breakpoint);
 751 }
 752
 753 /* Remove all matching breakpoints. */
 754 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
 755 {
 756     CPUBreakpoint *bp, *next;
 757
 758     QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
 759         if (bp->flags & mask) {
 760             cpu_breakpoint_remove_by_ref(cpu, bp);
 761         }
 762     }
 763 }
 764
 765 /* enable or disable single step mode. EXCP_DEBUG is returned by the
 766    CPU loop after each instruction */
 767 void cpu_single_step(CPUState *cpu, int enabled)
 768 {
 769     if (cpu->singlestep_enabled != enabled) {
 770         cpu->singlestep_enabled = enabled;
 771         if (kvm_enabled()) {
 772             kvm_update_guest_debug(cpu, 0);
 773         } else {
 774             /* must flush all the translated code to avoid inconsistencies */
 775             /* XXX: only flush what is necessary */
 776             CPUArchState *env = cpu->env_ptr;
 777             tb_flush(env);
 778         }
 779     }
 780 }
 781
 782 void cpu_abort(CPUState *cpu, const char *fmt, ...)
 783 {
 784     va_list ap;
 785     va_list ap2;
 786
 787     va_start(ap, fmt);
 788     va_copy(ap2, ap);
 789     fprintf(stderr, "qemu: fatal: ");
 790     vfprintf(stderr, fmt, ap);
 791     fprintf(stderr, "\n");
 792     cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 793     if (qemu_log_enabled()) {
 794         qemu_log("qemu: fatal: ");
 795         qemu_log_vprintf(fmt, ap2);
 796         qemu_log("\n");
 797         log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 798         qemu_log_flush();
 799         qemu_log_close();
 800     }
 801     va_end(ap2);
 802     va_end(ap);
 803 #if defined(CONFIG_USER_ONLY)
 804     {
 805         struct sigaction act;
 806         sigfillset(&act.sa_mask);
 807         act.sa_handler = SIG_DFL;
 808         sigaction(SIGABRT, &act, NULL);
 809     }
 810 #endif
 811     abort();
 812 }
 813
 814 #if !defined(CONFIG_USER_ONLY)
 815 /* Called from RCU critical section */
 816 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
 817 {
 818     RAMBlock *block;
 819
 820     block = atomic_rcu_read(&ram_list.mru_block);
 821     if (block && addr - block->offset < block->max_length) {
 822         goto found;
 823     }
 824     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
 825         if (addr - block->offset < block->max_length) {
 826             goto found;
 827         }
 828     }
 829
 830     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
 831     abort();
 832
 833 found:
 834     /* It is safe to write mru_block outside the iothread lock.  This
 835      * is what happens:
 836      *
 837      *     mru_block = xxx
 838      *     rcu_read_unlock()
 839      *                                        xxx removed from list
 840      *                  rcu_read_lock()
 841      *                  read mru_block
 842      *                                        mru_block = NULL;
 843      *                                        call_rcu(reclaim_ramblock, xxx);
 844      *                  rcu_read_unlock()
 845      *
 846      * atomic_rcu_set is not needed here.  The block was already published
 847      * when it was placed into the list.  Here we're just making an extra
 848      * copy of the pointer.
 849      */
 850     ram_list.mru_block = block;
 851     return block;
 852 }
 853
 854 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
 855 {
 856     ram_addr_t start1;
 857     RAMBlock *block;
 858     ram_addr_t end;
 859
 860     end = TARGET_PAGE_ALIGN(start + length);
 861     start &= TARGET_PAGE_MASK;
 862
 863     rcu_read_lock();
 864     block = qemu_get_ram_block(start);
 865     assert(block == qemu_get_ram_block(end - 1));
 866     start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
 867     cpu_tlb_reset_dirty_all(start1, length);
 868     rcu_read_unlock();
 869 }
 870
 871 /* Note: start and end must be within the same ram block.  */
 872 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
 873                                      unsigned client)
 874 {
 875     if (length == 0)
 876         return;
 877     cpu_physical_memory_clear_dirty_range_type(start, length, client);
 878
 879     if (tcg_enabled()) {
 880         tlb_reset_dirty_range_all(start, length);
 881     }
 882 }
 883
 884 static void cpu_physical_memory_set_dirty_tracking(bool enable)
 885 {
 886     in_migration = enable;
 887 }
 888
 889 /* Called from RCU critical section */
 890 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
 891                                        MemoryRegionSection *section,
 892                                        target_ulong vaddr,
 893                                        hwaddr paddr, hwaddr xlat,
 894                                        int prot,
 895                                        target_ulong *address)
 896 {
 897     hwaddr iotlb;
 898     CPUWatchpoint *wp;
 899
 900     if (memory_region_is_ram(section->mr)) {
 901         /* Normal RAM.  */
 902         iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
 903             + xlat;
 904         if (!section->readonly) {
 905             iotlb |= PHYS_SECTION_NOTDIRTY;
 906         } else {
 907             iotlb |= PHYS_SECTION_ROM;
 908         }
 909     } else {
 910         iotlb = section - section->address_space->dispatch->map.sections;
 911         iotlb += xlat;
 912     }
 913
 914     /* Make accesses to pages with watchpoints go via the
 915        watchpoint trap routines.  */
 916     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
 917         if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
 918             /* Avoid trapping reads of pages with a write breakpoint. */
 919             if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
 920                 iotlb = PHYS_SECTION_WATCH + paddr;
 921                 *address |= TLB_MMIO;
 922                 break;
 923             }
 924         }
 925     }
 926
 927     return iotlb;
 928 }
 929 #endif /* defined(CONFIG_USER_ONLY) */
 930
 931 #if !defined(CONFIG_USER_ONLY)
 932
 933 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
 934                              uint16_t section);
 935 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
 936
 937 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
 938                                qemu_anon_ram_alloc;
 939
 940 /*
 941  * Set a custom physical guest memory alloator.
 942  * Accelerators with unusual needs may need this.  Hopefully, we can
 943  * get rid of it eventually.
 944  */
 945 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
 946 {
 947     phys_mem_alloc = alloc;
 948 }
 949
 950 static uint16_t phys_section_add(PhysPageMap *map,
 951                                  MemoryRegionSection *section)
 952 {
 953     /* The physical section number is ORed with a page-aligned
 954      * pointer to produce the iotlb entries.  Thus it should
 955      * never overflow into the page-aligned value.
 956      */
 957     assert(map->sections_nb < TARGET_PAGE_SIZE);
 958
 959     if (map->sections_nb == map->sections_nb_alloc) {
 960         map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
 961         map->sections = g_renew(MemoryRegionSection, map->sections,
 962                                 map->sections_nb_alloc);
 963     }
 964     map->sections[map->sections_nb] = *section;
 965     memory_region_ref(section->mr);
 966     return map->sections_nb++;
 967 }
 968
 969 static void phys_section_destroy(MemoryRegion *mr)
 970 {
 971     memory_region_unref(mr);
 972
 973     if (mr->subpage) {
 974         subpage_t *subpage = container_of(mr, subpage_t, iomem);
 975         object_unref(OBJECT(&subpage->iomem));
 976         g_free(subpage);
 977     }
 978 }
 979
 980 static void phys_sections_free(PhysPageMap *map)
 981 {
 982     while (map->sections_nb > 0) {
 983         MemoryRegionSection *section = &map->sections[--map->sections_nb];
 984         phys_section_destroy(section->mr);
 985     }
 986     g_free(map->sections);
 987     g_free(map->nodes);
 988 }
 989
 990 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
 991 {
 992     subpage_t *subpage;
 993     hwaddr base = section->offset_within_address_space
 994         & TARGET_PAGE_MASK;
 995     MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
 996                                                    d->map.nodes, d->map.sections);
 997     MemoryRegionSection subsection = {
 998         .offset_within_address_space = base,
 999         .size = int128_make64(TARGET_PAGE_SIZE),
1000     };
1001     hwaddr start, end;
1002
1003     assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1004
1005     if (!(existing->mr->subpage)) {
1006         subpage = subpage_init(d->as, base);
1007         subsection.address_space = d->as;
1008         subsection.mr = &subpage->iomem;
1009         phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1010                       phys_section_add(&d->map, &subsection));
1011     } else {
1012         subpage = container_of(existing->mr, subpage_t, iomem);
1013     }
1014     start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1015     end = start + int128_get64(section->size) - 1;
1016     subpage_register(subpage, start, end,
1017                      phys_section_add(&d->map, section));
1018 }
1019
1020
1021 static void register_multipage(AddressSpaceDispatch *d,
1022                                MemoryRegionSection *section)
1023 {
1024     hwaddr start_addr = section->offset_within_address_space;
1025     uint16_t section_index = phys_section_add(&d->map, section);
1026     uint64_t num_pages = int128_get64(int128_rshift(section->size,
1027                                                     TARGET_PAGE_BITS));
1028
1029     assert(num_pages);
1030     phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1031 }
1032
1033 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1034 {
1035     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1036     AddressSpaceDispatch *d = as->next_dispatch;
1037     MemoryRegionSection now = *section, remain = *section;
1038     Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1039
1040     if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1041         uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1042                        - now.offset_within_address_space;
1043
1044         now.size = int128_min(int128_make64(left), now.size);
1045         register_subpage(d, &now);
1046     } else {
1047         now.size = int128_zero();
1048     }
1049     while (int128_ne(remain.size, now.size)) {
1050         remain.size = int128_sub(remain.size, now.size);
1051         remain.offset_within_address_space += int128_get64(now.size);
1052         remain.offset_within_region += int128_get64(now.size);
1053         now = remain;
1054         if (int128_lt(remain.size, page_size)) {
1055             register_subpage(d, &now);
1056         } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1057             now.size = page_size;
1058             register_subpage(d, &now);
1059         } else {
1060             now.size = int128_and(now.size, int128_neg(page_size));
1061             register_multipage(d, &now);
1062         }
1063     }
1064 }
1065
1066 void qemu_flush_coalesced_mmio_buffer(void)
1067 {
1068     if (kvm_enabled())
1069         kvm_flush_coalesced_mmio_buffer();
1070 }
1071
1072 void qemu_mutex_lock_ramlist(void)
1073 {
1074     qemu_mutex_lock(&ram_list.mutex);
1075 }
1076
1077 void qemu_mutex_unlock_ramlist(void)
1078 {
1079     qemu_mutex_unlock(&ram_list.mutex);
1080 }
1081
1082 #ifdef __linux__
1083
1084 #include <sys/vfs.h>
1085
1086 #define HUGETLBFS_MAGIC       0x958458f6
1087
1088 static long gethugepagesize(const char *path, Error **errp)
1089 {
1090     struct statfs fs;
1091     int ret;
1092
1093     do {
1094         ret = statfs(path, &fs);
1095     } while (ret != 0 && errno == EINTR);
1096
1097     if (ret != 0) {
1098         error_setg_errno(errp, errno, "failed to get page size of file %s",
1099                          path);
1100         return 0;
1101     }
1102
1103     if (fs.f_type != HUGETLBFS_MAGIC)
1104         fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1105
1106     return fs.f_bsize;
1107 }
1108
1109 static void *file_ram_alloc(RAMBlock *block,
1110                             ram_addr_t memory,
1111                             const char *path,
1112                             Error **errp)
1113 {
1114     char *filename;
1115     char *sanitized_name;
1116     char *c;
1117     void *area = NULL;
1118     int fd;
1119     uint64_t hpagesize;
1120     Error *local_err = NULL;
1121
1122     hpagesize = gethugepagesize(path, &local_err);
1123     if (local_err) {
1124         error_propagate(errp, local_err);
1125         goto error;
1126     }
1127     block->mr->align = hpagesize;
1128
1129     if (memory < hpagesize) {
1130         error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1131                    "or larger than huge page size 0x%" PRIx64,
1132                    memory, hpagesize);
1133         goto error;
1134     }
1135
1136     if (kvm_enabled() && !kvm_has_sync_mmu()) {
1137         error_setg(errp,
1138                    "host lacks kvm mmu notifiers, -mem-path unsupported");
1139         goto error;
1140     }
1141
1142     /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1143     sanitized_name = g_strdup(memory_region_name(block->mr));
1144     for (c = sanitized_name; *c != '\0'; c++) {
1145         if (*c == '/')
1146             *c = '_';
1147     }
1148
1149     filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1150                                sanitized_name);
1151     g_free(sanitized_name);
1152
1153     fd = mkstemp(filename);
1154     if (fd < 0) {
1155         error_setg_errno(errp, errno,
1156                          "unable to create backing store for hugepages");
1157         g_free(filename);
1158         goto error;
1159     }
1160     unlink(filename);
1161     g_free(filename);
1162
1163     memory = (memory+hpagesize-1) & ~(hpagesize-1);
1164
1165     /*
1166      * ftruncate is not supported by hugetlbfs in older
1167      * hosts, so don't bother bailing out on errors.
1168      * If anything goes wrong with it under other filesystems,
1169      * mmap will fail.
1170      */
1171     if (ftruncate(fd, memory)) {
1172         perror("ftruncate");
1173     }
1174
1175     area = mmap(0, memory, PROT_READ | PROT_WRITE,
1176                 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
1177                 fd, 0);
1178     if (area == MAP_FAILED) {
1179         error_setg_errno(errp, errno,
1180                          "unable to map backing store for hugepages");
1181         close(fd);
1182         goto error;
1183     }
1184
1185     if (mem_prealloc) {
1186         os_mem_prealloc(fd, area, memory);
1187     }
1188
1189     block->fd = fd;
1190     return area;
1191
1192 error:
1193     if (mem_prealloc) {
1194         error_report("%s", error_get_pretty(*errp));
1195         exit(1);
1196     }
1197     return NULL;
1198 }
1199 #endif
1200
1201 /* Called with the ramlist lock held.  */
1202 static ram_addr_t find_ram_offset(ram_addr_t size)
1203 {
1204     RAMBlock *block, *next_block;
1205     ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1206
1207     assert(size != 0); /* it would hand out same offset multiple times */
1208
1209     if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1210         return 0;
1211     }
1212
1213     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1214         ram_addr_t end, next = RAM_ADDR_MAX;
1215
1216         end = block->offset + block->max_length;
1217
1218         QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1219             if (next_block->offset >= end) {
1220                 next = MIN(next, next_block->offset);
1221             }
1222         }
1223         if (next - end >= size && next - end < mingap) {
1224             offset = end;
1225             mingap = next - end;
1226         }
1227     }
1228
1229     if (offset == RAM_ADDR_MAX) {
1230         fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1231                 (uint64_t)size);
1232         abort();
1233     }
1234
1235     return offset;
1236 }
1237
1238 ram_addr_t last_ram_offset(void)
1239 {
1240     RAMBlock *block;
1241     ram_addr_t last = 0;
1242
1243     rcu_read_lock();
1244     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1245         last = MAX(last, block->offset + block->max_length);
1246     }
1247     rcu_read_unlock();
1248     return last;
1249 }
1250
1251 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1252 {
1253     int ret;
1254
1255     /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1256     if (!machine_dump_guest_core(current_machine)) {
1257         ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1258         if (ret) {
1259             perror("qemu_madvise");
1260             fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1261                             "but dump_guest_core=off specified\n");
1262         }
1263     }
1264 }
1265
1266 /* Called within an RCU critical section, or while the ramlist lock
1267  * is held.
1268  */
1269 static RAMBlock *find_ram_block(ram_addr_t addr)
1270 {
1271     RAMBlock *block;
1272
1273     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1274         if (block->offset == addr) {
1275             return block;
1276         }
1277     }
1278
1279     return NULL;
1280 }
1281
1282 /* Called with iothread lock held.  */
1283 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1284 {
1285     RAMBlock *new_block, *block;
1286
1287     rcu_read_lock();
1288     new_block = find_ram_block(addr);
1289     assert(new_block);
1290     assert(!new_block->idstr[0]);
1291
1292     if (dev) {
1293         char *id = qdev_get_dev_path(dev);
1294         if (id) {
1295             snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1296             g_free(id);
1297         }
1298     }
1299     pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1300
1301     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1302         if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1303             fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1304                     new_block->idstr);
1305             abort();
1306         }
1307     }
1308     rcu_read_unlock();
1309 }
1310
1311 /* Called with iothread lock held.  */
1312 void qemu_ram_unset_idstr(ram_addr_t addr)
1313 {
1314     RAMBlock *block;
1315
1316     /* FIXME: arch_init.c assumes that this is not called throughout
1317      * migration.  Ignore the problem since hot-unplug during migration
1318      * does not work anyway.
1319      */
1320
1321     rcu_read_lock();
1322     block = find_ram_block(addr);
1323     if (block) {
1324         memset(block->idstr, 0, sizeof(block->idstr));
1325     }
1326     rcu_read_unlock();
1327 }
1328
1329 static int memory_try_enable_merging(void *addr, size_t len)
1330 {
1331     if (!machine_mem_merge(current_machine)) {
1332         /* disabled by the user */
1333         return 0;
1334     }
1335
1336     return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1337 }
1338
1339 /* Only legal before guest might have detected the memory size: e.g. on
1340  * incoming migration, or right after reset.
1341  *
1342  * As memory core doesn't know how is memory accessed, it is up to
1343  * resize callback to update device state and/or add assertions to detect
1344  * misuse, if necessary.
1345  */
1346 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1347 {
1348     RAMBlock *block = find_ram_block(base);
1349
1350     assert(block);
1351
1352     newsize = TARGET_PAGE_ALIGN(newsize);
1353
1354     if (block->used_length == newsize) {
1355         return 0;
1356     }
1357
1358     if (!(block->flags & RAM_RESIZEABLE)) {
1359         error_setg_errno(errp, EINVAL,
1360                          "Length mismatch: %s: 0x" RAM_ADDR_FMT
1361                          " in != 0x" RAM_ADDR_FMT, block->idstr,
1362                          newsize, block->used_length);
1363         return -EINVAL;
1364     }
1365
1366     if (block->max_length < newsize) {
1367         error_setg_errno(errp, EINVAL,
1368                          "Length too large: %s: 0x" RAM_ADDR_FMT
1369                          " > 0x" RAM_ADDR_FMT, block->idstr,
1370                          newsize, block->max_length);
1371         return -EINVAL;
1372     }
1373
1374     cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1375     block->used_length = newsize;
1376     cpu_physical_memory_set_dirty_range(block->offset, block->used_length);
1377     memory_region_set_size(block->mr, newsize);
1378     if (block->resized) {
1379         block->resized(block->idstr, newsize, block->host);
1380     }
1381     return 0;
1382 }
1383
1384 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1385 {
1386     RAMBlock *block;
1387     RAMBlock *last_block = NULL;
1388     ram_addr_t old_ram_size, new_ram_size;
1389
1390     old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1391
1392     qemu_mutex_lock_ramlist();
1393     new_block->offset = find_ram_offset(new_block->max_length);
1394
1395     if (!new_block->host) {
1396         if (xen_enabled()) {
1397             xen_ram_alloc(new_block->offset, new_block->max_length,
1398                           new_block->mr);
1399         } else {
1400             new_block->host = phys_mem_alloc(new_block->max_length,
1401                                              &new_block->mr->align);
1402             if (!new_block->host) {
1403                 error_setg_errno(errp, errno,
1404                                  "cannot set up guest memory '%s'",
1405                                  memory_region_name(new_block->mr));
1406                 qemu_mutex_unlock_ramlist();
1407                 return -1;
1408             }
1409             memory_try_enable_merging(new_block->host, new_block->max_length);
1410         }
1411     }
1412
1413     /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
1414      * QLIST (which has an RCU-friendly variant) does not have insertion at
1415      * tail, so save the last element in last_block.
1416      */
1417     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1418         last_block = block;
1419         if (block->max_length < new_block->max_length) {
1420             break;
1421         }
1422     }
1423     if (block) {
1424         QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1425     } else if (last_block) {
1426         QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1427     } else { /* list is empty */
1428         QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1429     }
1430     ram_list.mru_block = NULL;
1431
1432     /* Write list before version */
1433     smp_wmb();
1434     ram_list.version++;
1435     qemu_mutex_unlock_ramlist();
1436
1437     new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1438
1439     if (new_ram_size > old_ram_size) {
1440         int i;
1441
1442         /* ram_list.dirty_memory[] is protected by the iothread lock.  */
1443         for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1444             ram_list.dirty_memory[i] =
1445                 bitmap_zero_extend(ram_list.dirty_memory[i],
1446                                    old_ram_size, new_ram_size);
1447        }
1448     }
1449     cpu_physical_memory_set_dirty_range(new_block->offset,
1450                                         new_block->used_length);
1451
1452     if (new_block->host) {
1453         qemu_ram_setup_dump(new_block->host, new_block->max_length);
1454         qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1455         qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1456         if (kvm_enabled()) {
1457             kvm_setup_guest_memory(new_block->host, new_block->max_length);
1458         }
1459     }
1460
1461     return new_block->offset;
1462 }
1463
1464 #ifdef __linux__
1465 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1466                                     bool share, const char *mem_path,
1467                                     Error **errp)
1468 {
1469     RAMBlock *new_block;
1470     ram_addr_t addr;
1471     Error *local_err = NULL;
1472
1473     if (xen_enabled()) {
1474         error_setg(errp, "-mem-path not supported with Xen");
1475         return -1;
1476     }
1477
1478     if (phys_mem_alloc != qemu_anon_ram_alloc) {
1479         /*
1480          * file_ram_alloc() needs to allocate just like
1481          * phys_mem_alloc, but we haven't bothered to provide
1482          * a hook there.
1483          */
1484         error_setg(errp,
1485                    "-mem-path not supported with this accelerator");
1486         return -1;
1487     }
1488
1489     size = TARGET_PAGE_ALIGN(size);
1490     new_block = g_malloc0(sizeof(*new_block));
1491     new_block->mr = mr;
1492     new_block->used_length = size;
1493     new_block->max_length = size;
1494     new_block->flags = share ? RAM_SHARED : 0;
1495     new_block->host = file_ram_alloc(new_block, size,
1496                                      mem_path, errp);
1497     if (!new_block->host) {
1498         g_free(new_block);
1499         return -1;
1500     }
1501
1502     addr = ram_block_add(new_block, &local_err);
1503     if (local_err) {
1504         g_free(new_block);
1505         error_propagate(errp, local_err);
1506         return -1;
1507     }
1508     return addr;
1509 }
1510 #endif
1511
1512 static
1513 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1514                                    void (*resized)(const char*,
1515                                                    uint64_t length,
1516                                                    void *host),
1517                                    void *host, bool resizeable,
1518                                    MemoryRegion *mr, Error **errp)
1519 {
1520     RAMBlock *new_block;
1521     ram_addr_t addr;
1522     Error *local_err = NULL;
1523
1524     size = TARGET_PAGE_ALIGN(size);
1525     max_size = TARGET_PAGE_ALIGN(max_size);
1526     new_block = g_malloc0(sizeof(*new_block));
1527     new_block->mr = mr;
1528     new_block->resized = resized;
1529     new_block->used_length = size;
1530     new_block->max_length = max_size;
1531     assert(max_size >= size);
1532     new_block->fd = -1;
1533     new_block->host = host;
1534     if (host) {
1535         new_block->flags |= RAM_PREALLOC;
1536     }
1537     if (resizeable) {
1538         new_block->flags |= RAM_RESIZEABLE;
1539     }
1540     addr = ram_block_add(new_block, &local_err);
1541     if (local_err) {
1542         g_free(new_block);
1543         error_propagate(errp, local_err);
1544         return -1;
1545     }
1546     return addr;
1547 }
1548
1549 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1550                                    MemoryRegion *mr, Error **errp)
1551 {
1552     return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1553 }
1554
1555 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1556 {
1557     return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1558 }
1559
1560 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1561                                      void (*resized)(const char*,
1562                                                      uint64_t length,
1563                                                      void *host),
1564                                      MemoryRegion *mr, Error **errp)
1565 {
1566     return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1567 }
1568
1569 void qemu_ram_free_from_ptr(ram_addr_t addr)
1570 {
1571     RAMBlock *block;
1572
1573     qemu_mutex_lock_ramlist();
1574     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1575         if (addr == block->offset) {
1576             QLIST_REMOVE_RCU(block, next);
1577             ram_list.mru_block = NULL;
1578             /* Write list before version */
1579             smp_wmb();
1580             ram_list.version++;
1581             g_free_rcu(block, rcu);
1582             break;
1583         }
1584     }
1585     qemu_mutex_unlock_ramlist();
1586 }
1587
1588 static void reclaim_ramblock(RAMBlock *block)
1589 {
1590     if (block->flags & RAM_PREALLOC) {
1591         ;
1592     } else if (xen_enabled()) {
1593         xen_invalidate_map_cache_entry(block->host);
1594 #ifndef _WIN32
1595     } else if (block->fd >= 0) {
1596         munmap(block->host, block->max_length);
1597         close(block->fd);
1598 #endif
1599     } else {
1600         qemu_anon_ram_free(block->host, block->max_length);
1601     }
1602     g_free(block);
1603 }
1604
1605 void qemu_ram_free(ram_addr_t addr)
1606 {
1607     RAMBlock *block;
1608
1609     qemu_mutex_lock_ramlist();
1610     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1611         if (addr == block->offset) {
1612             QLIST_REMOVE_RCU(block, next);
1613             ram_list.mru_block = NULL;
1614             /* Write list before version */
1615             smp_wmb();
1616             ram_list.version++;
1617             call_rcu(block, reclaim_ramblock, rcu);
1618             break;
1619         }
1620     }
1621     qemu_mutex_unlock_ramlist();
1622 }
1623
1624 #ifndef _WIN32
1625 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1626 {
1627     RAMBlock *block;
1628     ram_addr_t offset;
1629     int flags;
1630     void *area, *vaddr;
1631
1632     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1633         offset = addr - block->offset;
1634         if (offset < block->max_length) {
1635             vaddr = ramblock_ptr(block, offset);
1636             if (block->flags & RAM_PREALLOC) {
1637                 ;
1638             } else if (xen_enabled()) {
1639                 abort();
1640             } else {
1641                 flags = MAP_FIXED;
1642                 munmap(vaddr, length);
1643                 if (block->fd >= 0) {
1644                     flags |= (block->flags & RAM_SHARED ?
1645                               MAP_SHARED : MAP_PRIVATE);
1646                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1647                                 flags, block->fd, offset);
1648                 } else {
1649                     /*
1650                      * Remap needs to match alloc.  Accelerators that
1651                      * set phys_mem_alloc never remap.  If they did,
1652                      * we'd need a remap hook here.
1653                      */
1654                     assert(phys_mem_alloc == qemu_anon_ram_alloc);
1655
1656                     flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1657                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1658                                 flags, -1, 0);
1659                 }
1660                 if (area != vaddr) {
1661                     fprintf(stderr, "Could not remap addr: "
1662                             RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1663                             length, addr);
1664                     exit(1);
1665                 }
1666                 memory_try_enable_merging(vaddr, length);
1667                 qemu_ram_setup_dump(vaddr, length);
1668             }
1669         }
1670     }
1671 }
1672 #endif /* !_WIN32 */
1673
1674 int qemu_get_ram_fd(ram_addr_t addr)
1675 {
1676     RAMBlock *block;
1677     int fd;
1678
1679     rcu_read_lock();
1680     block = qemu_get_ram_block(addr);
1681     fd = block->fd;
1682     rcu_read_unlock();
1683     return fd;
1684 }
1685
1686 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1687 {
1688     RAMBlock *block;
1689     void *ptr;
1690
1691     rcu_read_lock();
1692     block = qemu_get_ram_block(addr);
1693     ptr = ramblock_ptr(block, 0);
1694     rcu_read_unlock();
1695     return ptr;
1696 }
1697
1698 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1699  * This should not be used for general purpose DMA.  Use address_space_map
1700  * or address_space_rw instead. For local memory (e.g. video ram) that the
1701  * device owns, use memory_region_get_ram_ptr.
1702  *
1703  * By the time this function returns, the returned pointer is not protected
1704  * by RCU anymore.  If the caller is not within an RCU critical section and
1705  * does not hold the iothread lock, it must have other means of protecting the
1706  * pointer, such as a reference to the region that includes the incoming
1707  * ram_addr_t.
1708  */
1709 void *qemu_get_ram_ptr(ram_addr_t addr)
1710 {
1711     RAMBlock *block;
1712     void *ptr;
1713
1714     rcu_read_lock();
1715     block = qemu_get_ram_block(addr);
1716
1717     if (xen_enabled() && block->host == NULL) {
1718         /* We need to check if the requested address is in the RAM
1719          * because we don't want to map the entire memory in QEMU.
1720          * In that case just map until the end of the page.
1721          */
1722         if (block->offset == 0) {
1723             ptr = xen_map_cache(addr, 0, 0);
1724             goto unlock;
1725         }
1726
1727         block->host = xen_map_cache(block->offset, block->max_length, 1);
1728     }
1729     ptr = ramblock_ptr(block, addr - block->offset);
1730
1731 unlock:
1732     rcu_read_unlock();
1733     return ptr;
1734 }
1735
1736 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1737  * but takes a size argument.
1738  *
1739  * By the time this function returns, the returned pointer is not protected
1740  * by RCU anymore.  If the caller is not within an RCU critical section and
1741  * does not hold the iothread lock, it must have other means of protecting the
1742  * pointer, such as a reference to the region that includes the incoming
1743  * ram_addr_t.
1744  */
1745 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1746 {
1747     void *ptr;
1748     if (*size == 0) {
1749         return NULL;
1750     }
1751     if (xen_enabled()) {
1752         return xen_map_cache(addr, *size, 1);
1753     } else {
1754         RAMBlock *block;
1755         rcu_read_lock();
1756         QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1757             if (addr - block->offset < block->max_length) {
1758                 if (addr - block->offset + *size > block->max_length)
1759                     *size = block->max_length - addr + block->offset;
1760                 ptr = ramblock_ptr(block, addr - block->offset);
1761                 rcu_read_unlock();
1762                 return ptr;
1763             }
1764         }
1765
1766         fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1767         abort();
1768     }
1769 }
1770
1771 /* Some of the softmmu routines need to translate from a host pointer
1772  * (typically a TLB entry) back to a ram offset.
1773  *
1774  * By the time this function returns, the returned pointer is not protected
1775  * by RCU anymore.  If the caller is not within an RCU critical section and
1776  * does not hold the iothread lock, it must have other means of protecting the
1777  * pointer, such as a reference to the region that includes the incoming
1778  * ram_addr_t.
1779  */
1780 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1781 {
1782     RAMBlock *block;
1783     uint8_t *host = ptr;
1784     MemoryRegion *mr;
1785
1786     if (xen_enabled()) {
1787         rcu_read_lock();
1788         *ram_addr = xen_ram_addr_from_mapcache(ptr);
1789         mr = qemu_get_ram_block(*ram_addr)->mr;
1790         rcu_read_unlock();
1791         return mr;
1792     }
1793
1794     rcu_read_lock();
1795     block = atomic_rcu_read(&ram_list.mru_block);
1796     if (block && block->host && host - block->host < block->max_length) {
1797         goto found;
1798     }
1799
1800     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1801         /* This case append when the block is not mapped. */
1802         if (block->host == NULL) {
1803             continue;
1804         }
1805         if (host - block->host < block->max_length) {
1806             goto found;
1807         }
1808     }
1809
1810     rcu_read_unlock();
1811     return NULL;
1812
1813 found:
1814     *ram_addr = block->offset + (host - block->host);
1815     mr = block->mr;
1816     rcu_read_unlock();
1817     return mr;
1818 }
1819
1820 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1821                                uint64_t val, unsigned size)
1822 {
1823     if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1824         tb_invalidate_phys_page_fast(ram_addr, size);
1825     }
1826     switch (size) {
1827     case 1:
1828         stb_p(qemu_get_ram_ptr(ram_addr), val);
1829         break;
1830     case 2:
1831         stw_p(qemu_get_ram_ptr(ram_addr), val);
1832         break;
1833     case 4:
1834         stl_p(qemu_get_ram_ptr(ram_addr), val);
1835         break;
1836     default:
1837         abort();
1838     }
1839     cpu_physical_memory_set_dirty_range_nocode(ram_addr, size);
1840     /* we remove the notdirty callback only if the code has been
1841        flushed */
1842     if (!cpu_physical_memory_is_clean(ram_addr)) {
1843         CPUArchState *env = current_cpu->env_ptr;
1844         tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1845     }
1846 }
1847
1848 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1849                                  unsigned size, bool is_write)
1850 {
1851     return is_write;
1852 }
1853
1854 static const MemoryRegionOps notdirty_mem_ops = {
1855     .write = notdirty_mem_write,
1856     .valid.accepts = notdirty_mem_accepts,
1857     .endianness = DEVICE_NATIVE_ENDIAN,
1858 };
1859
1860 /* Generate a debug exception if a watchpoint has been hit.  */
1861 static void check_watchpoint(int offset, int len, int flags)
1862 {
1863     CPUState *cpu = current_cpu;
1864     CPUArchState *env = cpu->env_ptr;
1865     target_ulong pc, cs_base;
1866     target_ulong vaddr;
1867     CPUWatchpoint *wp;
1868     int cpu_flags;
1869
1870     if (cpu->watchpoint_hit) {
1871         /* We re-entered the check after replacing the TB. Now raise
1872          * the debug interrupt so that is will trigger after the
1873          * current instruction. */
1874         cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1875         return;
1876     }
1877     vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1878     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1879         if (cpu_watchpoint_address_matches(wp, vaddr, len)
1880             && (wp->flags & flags)) {
1881             if (flags == BP_MEM_READ) {
1882                 wp->flags |= BP_WATCHPOINT_HIT_READ;
1883             } else {
1884                 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
1885             }
1886             wp->hitaddr = vaddr;
1887             if (!cpu->watchpoint_hit) {
1888                 cpu->watchpoint_hit = wp;
1889                 tb_check_watchpoint(cpu);
1890                 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1891                     cpu->exception_index = EXCP_DEBUG;
1892                     cpu_loop_exit(cpu);
1893                 } else {
1894                     cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1895                     tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1896                     cpu_resume_from_signal(cpu, NULL);
1897                 }
1898             }
1899         } else {
1900             wp->flags &= ~BP_WATCHPOINT_HIT;
1901         }
1902     }
1903 }
1904
1905 /* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
1906    so these check for a hit then pass through to the normal out-of-line
1907    phys routines.  */
1908 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1909                                unsigned size)
1910 {
1911     check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_READ);
1912     switch (size) {
1913     case 1: return ldub_phys(&address_space_memory, addr);
1914     case 2: return lduw_phys(&address_space_memory, addr);
1915     case 4: return ldl_phys(&address_space_memory, addr);
1916     default: abort();
1917     }
1918 }
1919
1920 static void watch_mem_write(void *opaque, hwaddr addr,
1921                             uint64_t val, unsigned size)
1922 {
1923     check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_WRITE);
1924     switch (size) {
1925     case 1:
1926         stb_phys(&address_space_memory, addr, val);
1927         break;
1928     case 2:
1929         stw_phys(&address_space_memory, addr, val);
1930         break;
1931     case 4:
1932         stl_phys(&address_space_memory, addr, val);
1933         break;
1934     default: abort();
1935     }
1936 }
1937
1938 static const MemoryRegionOps watch_mem_ops = {
1939     .read = watch_mem_read,
1940     .write = watch_mem_write,
1941     .endianness = DEVICE_NATIVE_ENDIAN,
1942 };
1943
1944 static uint64_t subpage_read(void *opaque, hwaddr addr,
1945                              unsigned len)
1946 {
1947     subpage_t *subpage = opaque;
1948     uint8_t buf[8];
1949
1950 #if defined(DEBUG_SUBPAGE)
1951     printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1952            subpage, len, addr);
1953 #endif
1954     address_space_read(subpage->as, addr + subpage->base, buf, len);
1955     switch (len) {
1956     case 1:
1957         return ldub_p(buf);
1958     case 2:
1959         return lduw_p(buf);
1960     case 4:
1961         return ldl_p(buf);
1962     case 8:
1963         return ldq_p(buf);
1964     default:
1965         abort();
1966     }
1967 }
1968
1969 static void subpage_write(void *opaque, hwaddr addr,
1970                           uint64_t value, unsigned len)
1971 {
1972     subpage_t *subpage = opaque;
1973     uint8_t buf[8];
1974
1975 #if defined(DEBUG_SUBPAGE)
1976     printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1977            " value %"PRIx64"\n",
1978            __func__, subpage, len, addr, value);
1979 #endif
1980     switch (len) {
1981     case 1:
1982         stb_p(buf, value);
1983         break;
1984     case 2:
1985         stw_p(buf, value);
1986         break;
1987     case 4:
1988         stl_p(buf, value);
1989         break;
1990     case 8:
1991         stq_p(buf, value);
1992         break;
1993     default:
1994         abort();
1995     }
1996     address_space_write(subpage->as, addr + subpage->base, buf, len);
1997 }
1998
1999 static bool subpage_accepts(void *opaque, hwaddr addr,
2000                             unsigned len, bool is_write)
2001 {
2002     subpage_t *subpage = opaque;
2003 #if defined(DEBUG_SUBPAGE)
2004     printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2005            __func__, subpage, is_write ? 'w' : 'r', len, addr);
2006 #endif
2007
2008     return address_space_access_valid(subpage->as, addr + subpage->base,
2009                                       len, is_write);
2010 }
2011
2012 static const MemoryRegionOps subpage_ops = {
2013     .read = subpage_read,
2014     .write = subpage_write,
2015     .impl.min_access_size = 1,
2016     .impl.max_access_size = 8,
2017     .valid.min_access_size = 1,
2018     .valid.max_access_size = 8,
2019     .valid.accepts = subpage_accepts,
2020     .endianness = DEVICE_NATIVE_ENDIAN,
2021 };
2022
2023 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2024                              uint16_t section)
2025 {
2026     int idx, eidx;
2027
2028     if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2029         return -1;
2030     idx = SUBPAGE_IDX(start);
2031     eidx = SUBPAGE_IDX(end);
2032 #if defined(DEBUG_SUBPAGE)
2033     printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2034            __func__, mmio, start, end, idx, eidx, section);
2035 #endif
2036     for (; idx <= eidx; idx++) {
2037         mmio->sub_section[idx] = section;
2038     }
2039
2040     return 0;
2041 }
2042
2043 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2044 {
2045     subpage_t *mmio;
2046
2047     mmio = g_malloc0(sizeof(subpage_t));
2048
2049     mmio->as = as;
2050     mmio->base = base;
2051     memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2052                           NULL, TARGET_PAGE_SIZE);
2053     mmio->iomem.subpage = true;
2054 #if defined(DEBUG_SUBPAGE)
2055     printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2056            mmio, base, TARGET_PAGE_SIZE);
2057 #endif
2058     subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2059
2060     return mmio;
2061 }
2062
2063 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2064                               MemoryRegion *mr)
2065 {
2066     assert(as);
2067     MemoryRegionSection section = {
2068         .address_space = as,
2069         .mr = mr,
2070         .offset_within_address_space = 0,
2071         .offset_within_region = 0,
2072         .size = int128_2_64(),
2073     };
2074
2075     return phys_section_add(map, &section);
2076 }
2077
2078 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
2079 {
2080     AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch);
2081     MemoryRegionSection *sections = d->map.sections;
2082
2083     return sections[index & ~TARGET_PAGE_MASK].mr;
2084 }
2085
2086 static void io_mem_init(void)
2087 {
2088     memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2089     memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2090                           NULL, UINT64_MAX);
2091     memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2092                           NULL, UINT64_MAX);
2093     memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2094                           NULL, UINT64_MAX);
2095 }
2096
2097 static void mem_begin(MemoryListener *listener)
2098 {
2099     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2100     AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2101     uint16_t n;
2102
2103     n = dummy_section(&d->map, as, &io_mem_unassigned);
2104     assert(n == PHYS_SECTION_UNASSIGNED);
2105     n = dummy_section(&d->map, as, &io_mem_notdirty);
2106     assert(n == PHYS_SECTION_NOTDIRTY);
2107     n = dummy_section(&d->map, as, &io_mem_rom);
2108     assert(n == PHYS_SECTION_ROM);
2109     n = dummy_section(&d->map, as, &io_mem_watch);
2110     assert(n == PHYS_SECTION_WATCH);
2111
2112     d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2113     d->as = as;
2114     as->next_dispatch = d;
2115 }
2116
2117 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2118 {
2119     phys_sections_free(&d->map);
2120     g_free(d);
2121 }
2122
2123 static void mem_commit(MemoryListener *listener)
2124 {
2125     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2126     AddressSpaceDispatch *cur = as->dispatch;
2127     AddressSpaceDispatch *next = as->next_dispatch;
2128
2129     phys_page_compact_all(next, next->map.nodes_nb);
2130
2131     atomic_rcu_set(&as->dispatch, next);
2132     if (cur) {
2133         call_rcu(cur, address_space_dispatch_free, rcu);
2134     }
2135 }
2136
2137 static void tcg_commit(MemoryListener *listener)
2138 {
2139     CPUState *cpu;
2140
2141     /* since each CPU stores ram addresses in its TLB cache, we must
2142        reset the modified entries */
2143     /* XXX: slow ! */
2144     CPU_FOREACH(cpu) {
2145         /* FIXME: Disentangle the cpu.h circular files deps so we can
2146            directly get the right CPU from listener.  */
2147         if (cpu->tcg_as_listener != listener) {
2148             continue;
2149         }
2150         cpu_reload_memory_map(cpu);
2151     }
2152 }
2153
2154 static void core_log_global_start(MemoryListener *listener)
2155 {
2156     cpu_physical_memory_set_dirty_tracking(true);
2157 }
2158
2159 static void core_log_global_stop(MemoryListener *listener)
2160 {
2161     cpu_physical_memory_set_dirty_tracking(false);
2162 }
2163
2164 static MemoryListener core_memory_listener = {
2165     .log_global_start = core_log_global_start,
2166     .log_global_stop = core_log_global_stop,
2167     .priority = 1,
2168 };
2169
2170 void address_space_init_dispatch(AddressSpace *as)
2171 {
2172     as->dispatch = NULL;
2173     as->dispatch_listener = (MemoryListener) {
2174         .begin = mem_begin,
2175         .commit = mem_commit,
2176         .region_add = mem_add,
2177         .region_nop = mem_add,
2178         .priority = 0,
2179     };
2180     memory_listener_register(&as->dispatch_listener, as);
2181 }
2182
2183 void address_space_unregister(AddressSpace *as)
2184 {
2185     memory_listener_unregister(&as->dispatch_listener);
2186 }
2187
2188 void address_space_destroy_dispatch(AddressSpace *as)
2189 {
2190     AddressSpaceDispatch *d = as->dispatch;
2191
2192     atomic_rcu_set(&as->dispatch, NULL);
2193     if (d) {
2194         call_rcu(d, address_space_dispatch_free, rcu);
2195     }
2196 }
2197
2198 static void memory_map_init(void)
2199 {
2200     system_memory = g_malloc(sizeof(*system_memory));
2201
2202     memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2203     address_space_init(&address_space_memory, system_memory, "memory");
2204
2205     system_io = g_malloc(sizeof(*system_io));
2206     memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2207                           65536);
2208     address_space_init(&address_space_io, system_io, "I/O");
2209
2210     memory_listener_register(&core_memory_listener, &address_space_memory);
2211 }
2212
2213 MemoryRegion *get_system_memory(void)
2214 {
2215     return system_memory;
2216 }
2217
2218 MemoryRegion *get_system_io(void)
2219 {
2220     return system_io;
2221 }
2222
2223 #endif /* !defined(CONFIG_USER_ONLY) */
2224
2225 /* physical memory access (slow version, mainly for debug) */
2226 #if defined(CONFIG_USER_ONLY)
2227 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2228                         uint8_t *buf, int len, int is_write)
2229 {
2230     int l, flags;
2231     target_ulong page;
2232     void * p;
2233
2234     while (len > 0) {
2235         page = addr & TARGET_PAGE_MASK;
2236         l = (page + TARGET_PAGE_SIZE) - addr;
2237         if (l > len)
2238             l = len;
2239         flags = page_get_flags(page);
2240         if (!(flags & PAGE_VALID))
2241             return -1;
2242         if (is_write) {
2243             if (!(flags & PAGE_WRITE))
2244                 return -1;
2245             /* XXX: this code should not depend on lock_user */
2246             if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2247                 return -1;
2248             memcpy(p, buf, l);
2249             unlock_user(p, addr, l);
2250         } else {
2251             if (!(flags & PAGE_READ))
2252                 return -1;
2253             /* XXX: this code should not depend on lock_user */
2254             if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2255                 return -1;
2256             memcpy(buf, p, l);
2257             unlock_user(p, addr, 0);
2258         }
2259         len -= l;
2260         buf += l;
2261         addr += l;
2262     }
2263     return 0;
2264 }
2265
2266 #else
2267
2268 static void invalidate_and_set_dirty(hwaddr addr,
2269                                      hwaddr length)
2270 {
2271     if (cpu_physical_memory_range_includes_clean(addr, length)) {
2272         tb_invalidate_phys_range(addr, addr + length, 0);
2273         cpu_physical_memory_set_dirty_range_nocode(addr, length);
2274     }
2275     xen_modified_memory(addr, length);
2276 }
2277
2278 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2279 {
2280     unsigned access_size_max = mr->ops->valid.max_access_size;
2281
2282     /* Regions are assumed to support 1-4 byte accesses unless
2283        otherwise specified.  */
2284     if (access_size_max == 0) {
2285         access_size_max = 4;
2286     }
2287
2288     /* Bound the maximum access by the alignment of the address.  */
2289     if (!mr->ops->impl.unaligned) {
2290         unsigned align_size_max = addr & -addr;
2291         if (align_size_max != 0 && align_size_max < access_size_max) {
2292             access_size_max = align_size_max;
2293         }
2294     }
2295
2296     /* Don't attempt accesses larger than the maximum.  */
2297     if (l > access_size_max) {
2298         l = access_size_max;
2299     }
2300     if (l & (l - 1)) {
2301         l = 1 << (qemu_fls(l) - 1);
2302     }
2303
2304     return l;
2305 }
2306
2307 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
2308                       int len, bool is_write)
2309 {
2310     hwaddr l;
2311     uint8_t *ptr;
2312     uint64_t val;
2313     hwaddr addr1;
2314     MemoryRegion *mr;
2315     bool error = false;
2316
2317     while (len > 0) {
2318         l = len;
2319         mr = address_space_translate(as, addr, &addr1, &l, is_write);
2320
2321         if (is_write) {
2322             if (!memory_access_is_direct(mr, is_write)) {
2323                 l = memory_access_size(mr, l, addr1);
2324                 /* XXX: could force current_cpu to NULL to avoid
2325                    potential bugs */
2326                 switch (l) {
2327                 case 8:
2328                     /* 64 bit write access */
2329                     val = ldq_p(buf);
2330                     error |= io_mem_write(mr, addr1, val, 8);
2331                     break;
2332                 case 4:
2333                     /* 32 bit write access */
2334                     val = ldl_p(buf);
2335                     error |= io_mem_write(mr, addr1, val, 4);
2336                     break;
2337                 case 2:
2338                     /* 16 bit write access */
2339                     val = lduw_p(buf);
2340                     error |= io_mem_write(mr, addr1, val, 2);
2341                     break;
2342                 case 1:
2343                     /* 8 bit write access */
2344                     val = ldub_p(buf);
2345                     error |= io_mem_write(mr, addr1, val, 1);
2346                     break;
2347                 default:
2348                     abort();
2349                 }
2350             } else {
2351                 addr1 += memory_region_get_ram_addr(mr);
2352                 /* RAM case */
2353                 ptr = qemu_get_ram_ptr(addr1);
2354                 memcpy(ptr, buf, l);
2355                 invalidate_and_set_dirty(addr1, l);
2356             }
2357         } else {
2358             if (!memory_access_is_direct(mr, is_write)) {
2359                 /* I/O case */
2360                 l = memory_access_size(mr, l, addr1);
2361                 switch (l) {
2362                 case 8:
2363                     /* 64 bit read access */
2364                     error |= io_mem_read(mr, addr1, &val, 8);
2365                     stq_p(buf, val);
2366                     break;
2367                 case 4:
2368                     /* 32 bit read access */
2369                     error |= io_mem_read(mr, addr1, &val, 4);
2370                     stl_p(buf, val);
2371                     break;
2372                 case 2:
2373                     /* 16 bit read access */
2374                     error |= io_mem_read(mr, addr1, &val, 2);
2375                     stw_p(buf, val);
2376                     break;
2377                 case 1:
2378                     /* 8 bit read access */
2379                     error |= io_mem_read(mr, addr1, &val, 1);
2380                     stb_p(buf, val);
2381                     break;
2382                 default:
2383                     abort();
2384                 }
2385             } else {
2386                 /* RAM case */
2387                 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2388                 memcpy(buf, ptr, l);
2389             }
2390         }
2391         len -= l;
2392         buf += l;
2393         addr += l;
2394     }
2395
2396     return error;
2397 }
2398
2399 bool address_space_write(AddressSpace *as, hwaddr addr,
2400                          const uint8_t *buf, int len)
2401 {
2402     return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2403 }
2404
2405 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2406 {
2407     return address_space_rw(as, addr, buf, len, false);
2408 }
2409
2410
2411 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2412                             int len, int is_write)
2413 {
2414     address_space_rw(&address_space_memory, addr, buf, len, is_write);
2415 }
2416
2417 enum write_rom_type {
2418     WRITE_DATA,
2419     FLUSH_CACHE,
2420 };
2421
2422 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2423     hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2424 {
2425     hwaddr l;
2426     uint8_t *ptr;
2427     hwaddr addr1;
2428     MemoryRegion *mr;
2429
2430     while (len > 0) {
2431         l = len;
2432         mr = address_space_translate(as, addr, &addr1, &l, true);
2433
2434         if (!(memory_region_is_ram(mr) ||
2435               memory_region_is_romd(mr))) {
2436             /* do nothing */
2437         } else {
2438             addr1 += memory_region_get_ram_addr(mr);
2439             /* ROM/RAM case */
2440             ptr = qemu_get_ram_ptr(addr1);
2441             switch (type) {
2442             case WRITE_DATA:
2443                 memcpy(ptr, buf, l);
2444                 invalidate_and_set_dirty(addr1, l);
2445                 break;
2446             case FLUSH_CACHE:
2447                 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2448                 break;
2449             }
2450         }
2451         len -= l;
2452         buf += l;
2453         addr += l;
2454     }
2455 }
2456
2457 /* used for ROM loading : can write in RAM and ROM */
2458 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2459                                    const uint8_t *buf, int len)
2460 {
2461     cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2462 }
2463
2464 void cpu_flush_icache_range(hwaddr start, int len)
2465 {
2466     /*
2467      * This function should do the same thing as an icache flush that was
2468      * triggered from within the guest. For TCG we are always cache coherent,
2469      * so there is no need to flush anything. For KVM / Xen we need to flush
2470      * the host's instruction cache at least.
2471      */
2472     if (tcg_enabled()) {
2473         return;
2474     }
2475
2476     cpu_physical_memory_write_rom_internal(&address_space_memory,
2477                                            start, NULL, len, FLUSH_CACHE);
2478 }
2479
2480 typedef struct {
2481     MemoryRegion *mr;
2482     void *buffer;
2483     hwaddr addr;
2484     hwaddr len;
2485 } BounceBuffer;
2486
2487 static BounceBuffer bounce;
2488
2489 typedef struct MapClient {
2490     void *opaque;
2491     void (*callback)(void *opaque);
2492     QLIST_ENTRY(MapClient) link;
2493 } MapClient;
2494
2495 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2496     = QLIST_HEAD_INITIALIZER(map_client_list);
2497
2498 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2499 {
2500     MapClient *client = g_malloc(sizeof(*client));
2501
2502     client->opaque = opaque;
2503     client->callback = callback;
2504     QLIST_INSERT_HEAD(&map_client_list, client, link);
2505     return client;
2506 }
2507
2508 static void cpu_unregister_map_client(void *_client)
2509 {
2510     MapClient *client = (MapClient *)_client;
2511
2512     QLIST_REMOVE(client, link);
2513     g_free(client);
2514 }
2515
2516 static void cpu_notify_map_clients(void)
2517 {
2518     MapClient *client;
2519
2520     while (!QLIST_EMPTY(&map_client_list)) {
2521         client = QLIST_FIRST(&map_client_list);
2522         client->callback(client->opaque);
2523         cpu_unregister_map_client(client);
2524     }
2525 }
2526
2527 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2528 {
2529     MemoryRegion *mr;
2530     hwaddr l, xlat;
2531
2532     while (len > 0) {
2533         l = len;
2534         mr = address_space_translate(as, addr, &xlat, &l, is_write);
2535         if (!memory_access_is_direct(mr, is_write)) {
2536             l = memory_access_size(mr, l, addr);
2537             if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2538                 return false;
2539             }
2540         }
2541
2542         len -= l;
2543         addr += l;
2544     }
2545     return true;
2546 }
2547
2548 /* Map a physical memory region into a host virtual address.
2549  * May map a subset of the requested range, given by and returned in *plen.
2550  * May return NULL if resources needed to perform the mapping are exhausted.
2551  * Use only for reads OR writes - not for read-modify-write operations.
2552  * Use cpu_register_map_client() to know when retrying the map operation is
2553  * likely to succeed.
2554  */
2555 void *address_space_map(AddressSpace *as,
2556                         hwaddr addr,
2557                         hwaddr *plen,
2558                         bool is_write)
2559 {
2560     hwaddr len = *plen;
2561     hwaddr done = 0;
2562     hwaddr l, xlat, base;
2563     MemoryRegion *mr, *this_mr;
2564     ram_addr_t raddr;
2565
2566     if (len == 0) {
2567         return NULL;
2568     }
2569
2570     l = len;
2571     mr = address_space_translate(as, addr, &xlat, &l, is_write);
2572     if (!memory_access_is_direct(mr, is_write)) {
2573         if (bounce.buffer) {
2574             return NULL;
2575         }
2576         /* Avoid unbounded allocations */
2577         l = MIN(l, TARGET_PAGE_SIZE);
2578         bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2579         bounce.addr = addr;
2580         bounce.len = l;
2581
2582         memory_region_ref(mr);
2583         bounce.mr = mr;
2584         if (!is_write) {
2585             address_space_read(as, addr, bounce.buffer, l);
2586         }
2587
2588         *plen = l;
2589         return bounce.buffer;
2590     }
2591
2592     base = xlat;
2593     raddr = memory_region_get_ram_addr(mr);
2594
2595     for (;;) {
2596         len -= l;
2597         addr += l;
2598         done += l;
2599         if (len == 0) {
2600             break;
2601         }
2602
2603         l = len;
2604         this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2605         if (this_mr != mr || xlat != base + done) {
2606             break;
2607         }
2608     }
2609
2610     memory_region_ref(mr);
2611     *plen = done;
2612     return qemu_ram_ptr_length(raddr + base, plen);
2613 }
2614
2615 /* Unmaps a memory region previously mapped by address_space_map().
2616  * Will also mark the memory as dirty if is_write == 1.  access_len gives
2617  * the amount of memory that was actually read or written by the caller.
2618  */
2619 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2620                          int is_write, hwaddr access_len)
2621 {
2622     if (buffer != bounce.buffer) {
2623         MemoryRegion *mr;
2624         ram_addr_t addr1;
2625
2626         mr = qemu_ram_addr_from_host(buffer, &addr1);
2627         assert(mr != NULL);
2628         if (is_write) {
2629             invalidate_and_set_dirty(addr1, access_len);
2630         }
2631         if (xen_enabled()) {
2632             xen_invalidate_map_cache_entry(buffer);
2633         }
2634         memory_region_unref(mr);
2635         return;
2636     }
2637     if (is_write) {
2638         address_space_write(as, bounce.addr, bounce.buffer, access_len);
2639     }
2640     qemu_vfree(bounce.buffer);
2641     bounce.buffer = NULL;
2642     memory_region_unref(bounce.mr);
2643     cpu_notify_map_clients();
2644 }
2645
2646 void *cpu_physical_memory_map(hwaddr addr,
2647                               hwaddr *plen,
2648                               int is_write)
2649 {
2650     return address_space_map(&address_space_memory, addr, plen, is_write);
2651 }
2652
2653 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2654                                int is_write, hwaddr access_len)
2655 {
2656     return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2657 }
2658
2659 /* warning: addr must be aligned */
2660 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2661                                          enum device_endian endian)
2662 {
2663     uint8_t *ptr;
2664     uint64_t val;
2665     MemoryRegion *mr;
2666     hwaddr l = 4;
2667     hwaddr addr1;
2668
2669     mr = address_space_translate(as, addr, &addr1, &l, false);
2670     if (l < 4 || !memory_access_is_direct(mr, false)) {
2671         /* I/O case */
2672         io_mem_read(mr, addr1, &val, 4);
2673 #if defined(TARGET_WORDS_BIGENDIAN)
2674         if (endian == DEVICE_LITTLE_ENDIAN) {
2675             val = bswap32(val);
2676         }
2677 #else
2678         if (endian == DEVICE_BIG_ENDIAN) {
2679             val = bswap32(val);
2680         }
2681 #endif
2682     } else {
2683         /* RAM case */
2684         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2685                                 & TARGET_PAGE_MASK)
2686                                + addr1);
2687         switch (endian) {
2688         case DEVICE_LITTLE_ENDIAN:
2689             val = ldl_le_p(ptr);
2690             break;
2691         case DEVICE_BIG_ENDIAN:
2692             val = ldl_be_p(ptr);
2693             break;
2694         default:
2695             val = ldl_p(ptr);
2696             break;
2697         }
2698     }
2699     return val;
2700 }
2701
2702 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2703 {
2704     return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2705 }
2706
2707 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2708 {
2709     return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2710 }
2711
2712 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2713 {
2714     return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2715 }
2716
2717 /* warning: addr must be aligned */
2718 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2719                                          enum device_endian endian)
2720 {
2721     uint8_t *ptr;
2722     uint64_t val;
2723     MemoryRegion *mr;
2724     hwaddr l = 8;
2725     hwaddr addr1;
2726
2727     mr = address_space_translate(as, addr, &addr1, &l,
2728                                  false);
2729     if (l < 8 || !memory_access_is_direct(mr, false)) {
2730         /* I/O case */
2731         io_mem_read(mr, addr1, &val, 8);
2732 #if defined(TARGET_WORDS_BIGENDIAN)
2733         if (endian == DEVICE_LITTLE_ENDIAN) {
2734             val = bswap64(val);
2735         }
2736 #else
2737         if (endian == DEVICE_BIG_ENDIAN) {
2738             val = bswap64(val);
2739         }
2740 #endif
2741     } else {
2742         /* RAM case */
2743         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2744                                 & TARGET_PAGE_MASK)
2745                                + addr1);
2746         switch (endian) {
2747         case DEVICE_LITTLE_ENDIAN:
2748             val = ldq_le_p(ptr);
2749             break;
2750         case DEVICE_BIG_ENDIAN:
2751             val = ldq_be_p(ptr);
2752             break;
2753         default:
2754             val = ldq_p(ptr);
2755             break;
2756         }
2757     }
2758     return val;
2759 }
2760
2761 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2762 {
2763     return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2764 }
2765
2766 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2767 {
2768     return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2769 }
2770
2771 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2772 {
2773     return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2774 }
2775
2776 /* XXX: optimize */
2777 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2778 {
2779     uint8_t val;
2780     address_space_rw(as, addr, &val, 1, 0);
2781     return val;
2782 }
2783
2784 /* warning: addr must be aligned */
2785 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2786                                           enum device_endian endian)
2787 {
2788     uint8_t *ptr;
2789     uint64_t val;
2790     MemoryRegion *mr;
2791     hwaddr l = 2;
2792     hwaddr addr1;
2793
2794     mr = address_space_translate(as, addr, &addr1, &l,
2795                                  false);
2796     if (l < 2 || !memory_access_is_direct(mr, false)) {
2797         /* I/O case */
2798         io_mem_read(mr, addr1, &val, 2);
2799 #if defined(TARGET_WORDS_BIGENDIAN)
2800         if (endian == DEVICE_LITTLE_ENDIAN) {
2801             val = bswap16(val);
2802         }
2803 #else
2804         if (endian == DEVICE_BIG_ENDIAN) {
2805             val = bswap16(val);
2806         }
2807 #endif
2808     } else {
2809         /* RAM case */
2810         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2811                                 & TARGET_PAGE_MASK)
2812                                + addr1);
2813         switch (endian) {
2814         case DEVICE_LITTLE_ENDIAN:
2815             val = lduw_le_p(ptr);
2816             break;
2817         case DEVICE_BIG_ENDIAN:
2818             val = lduw_be_p(ptr);
2819             break;
2820         default:
2821             val = lduw_p(ptr);
2822             break;
2823         }
2824     }
2825     return val;
2826 }
2827
2828 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2829 {
2830     return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2831 }
2832
2833 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2834 {
2835     return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2836 }
2837
2838 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2839 {
2840     return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2841 }
2842
2843 /* warning: addr must be aligned. The ram page is not masked as dirty
2844    and the code inside is not invalidated. It is useful if the dirty
2845    bits are used to track modified PTEs */
2846 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2847 {
2848     uint8_t *ptr;
2849     MemoryRegion *mr;
2850     hwaddr l = 4;
2851     hwaddr addr1;
2852
2853     mr = address_space_translate(as, addr, &addr1, &l,
2854                                  true);
2855     if (l < 4 || !memory_access_is_direct(mr, true)) {
2856         io_mem_write(mr, addr1, val, 4);
2857     } else {
2858         addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2859         ptr = qemu_get_ram_ptr(addr1);
2860         stl_p(ptr, val);
2861
2862         if (unlikely(in_migration)) {
2863             if (cpu_physical_memory_is_clean(addr1)) {
2864                 /* invalidate code */
2865                 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2866                 /* set dirty bit */
2867                 cpu_physical_memory_set_dirty_range_nocode(addr1, 4);
2868             }
2869         }
2870     }
2871 }
2872
2873 /* warning: addr must be aligned */
2874 static inline void stl_phys_internal(AddressSpace *as,
2875                                      hwaddr addr, uint32_t val,
2876                                      enum device_endian endian)
2877 {
2878     uint8_t *ptr;
2879     MemoryRegion *mr;
2880     hwaddr l = 4;
2881     hwaddr addr1;
2882
2883     mr = address_space_translate(as, addr, &addr1, &l,
2884                                  true);
2885     if (l < 4 || !memory_access_is_direct(mr, true)) {
2886 #if defined(TARGET_WORDS_BIGENDIAN)
2887         if (endian == DEVICE_LITTLE_ENDIAN) {
2888             val = bswap32(val);
2889         }
2890 #else
2891         if (endian == DEVICE_BIG_ENDIAN) {
2892             val = bswap32(val);
2893         }
2894 #endif
2895         io_mem_write(mr, addr1, val, 4);
2896     } else {
2897         /* RAM case */
2898         addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2899         ptr = qemu_get_ram_ptr(addr1);
2900         switch (endian) {
2901         case DEVICE_LITTLE_ENDIAN:
2902             stl_le_p(ptr, val);
2903             break;
2904         case DEVICE_BIG_ENDIAN:
2905             stl_be_p(ptr, val);
2906             break;
2907         default:
2908             stl_p(ptr, val);
2909             break;
2910         }
2911         invalidate_and_set_dirty(addr1, 4);
2912     }
2913 }
2914
2915 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2916 {
2917     stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2918 }
2919
2920 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2921 {
2922     stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2923 }
2924
2925 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2926 {
2927     stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2928 }
2929
2930 /* XXX: optimize */
2931 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2932 {
2933     uint8_t v = val;
2934     address_space_rw(as, addr, &v, 1, 1);
2935 }
2936
2937 /* warning: addr must be aligned */
2938 static inline void stw_phys_internal(AddressSpace *as,
2939                                      hwaddr addr, uint32_t val,
2940                                      enum device_endian endian)
2941 {
2942     uint8_t *ptr;
2943     MemoryRegion *mr;
2944     hwaddr l = 2;
2945     hwaddr addr1;
2946
2947     mr = address_space_translate(as, addr, &addr1, &l, true);
2948     if (l < 2 || !memory_access_is_direct(mr, true)) {
2949 #if defined(TARGET_WORDS_BIGENDIAN)
2950         if (endian == DEVICE_LITTLE_ENDIAN) {
2951             val = bswap16(val);
2952         }
2953 #else
2954         if (endian == DEVICE_BIG_ENDIAN) {
2955             val = bswap16(val);
2956         }
2957 #endif
2958         io_mem_write(mr, addr1, val, 2);
2959     } else {
2960         /* RAM case */
2961         addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2962         ptr = qemu_get_ram_ptr(addr1);
2963         switch (endian) {
2964         case DEVICE_LITTLE_ENDIAN:
2965             stw_le_p(ptr, val);
2966             break;
2967         case DEVICE_BIG_ENDIAN:
2968             stw_be_p(ptr, val);
2969             break;
2970         default:
2971             stw_p(ptr, val);
2972             break;
2973         }
2974         invalidate_and_set_dirty(addr1, 2);
2975     }
2976 }
2977
2978 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2979 {
2980     stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2981 }
2982
2983 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2984 {
2985     stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2986 }
2987
2988 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2989 {
2990     stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2991 }
2992
2993 /* XXX: optimize */
2994 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2995 {
2996     val = tswap64(val);
2997     address_space_rw(as, addr, (void *) &val, 8, 1);
2998 }
2999
3000 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3001 {
3002     val = cpu_to_le64(val);
3003     address_space_rw(as, addr, (void *) &val, 8, 1);
3004 }
3005
3006 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3007 {
3008     val = cpu_to_be64(val);
3009     address_space_rw(as, addr, (void *) &val, 8, 1);
3010 }
3011
3012 /* virtual memory access for debug (includes writing to ROM) */
3013 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3014                         uint8_t *buf, int len, int is_write)
3015 {
3016     int l;
3017     hwaddr phys_addr;
3018     target_ulong page;
3019
3020     while (len > 0) {
3021         page = addr & TARGET_PAGE_MASK;
3022         phys_addr = cpu_get_phys_page_debug(cpu, page);
3023         /* if no physical page mapped, return an error */
3024         if (phys_addr == -1)
3025             return -1;
3026         l = (page + TARGET_PAGE_SIZE) - addr;
3027         if (l > len)
3028             l = len;
3029         phys_addr += (addr & ~TARGET_PAGE_MASK);
3030         if (is_write) {
3031             cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
3032         } else {
3033             address_space_rw(cpu->as, phys_addr, buf, l, 0);
3034         }
3035         len -= l;
3036         buf += l;
3037         addr += l;
3038     }
3039     return 0;
3040 }
3041 #endif
3042
3043 /*
3044  * A helper function for the _utterly broken_ virtio device model to find out if
3045  * it's running on a big endian machine. Don't do this at home kids!
3046  */
3047 bool target_words_bigendian(void);
3048 bool target_words_bigendian(void)
3049 {
3050 #if defined(TARGET_WORDS_BIGENDIAN)
3051     return true;
3052 #else
3053     return false;
3054 #endif
3055 }
3056
3057 #ifndef CONFIG_USER_ONLY
3058 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3059 {
3060     MemoryRegion*mr;
3061     hwaddr l = 1;
3062
3063     mr = address_space_translate(&address_space_memory,
3064                                  phys_addr, &phys_addr, &l, false);
3065
3066     return !(memory_region_is_ram(mr) ||
3067              memory_region_is_romd(mr));
3068 }
3069
3070 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3071 {
3072     RAMBlock *block;
3073
3074     rcu_read_lock();
3075     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3076         func(block->host, block->offset, block->used_length, opaque);
3077     }
3078     rcu_read_unlock();
3079 }
3080 #endif