virtio-blk: Rename complete_request_early to complete_request_vring
[qemu.git] / exec.c
blobc8494051a627e6abfc355429a0045c4aca9c93a7
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #include "hw/qdev.h"
30 #include "qemu/osdep.h"
31 #include "sysemu/kvm.h"
32 #include "sysemu/sysemu.h"
33 #include "hw/xen/xen.h"
34 #include "qemu/timer.h"
35 #include "qemu/config-file.h"
36 #include "qemu/error-report.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
51 #include "exec/memory-internal.h"
52 #include "exec/ram_addr.h"
54 #include "qemu/range.h"
56 //#define DEBUG_SUBPAGE
58 #if !defined(CONFIG_USER_ONLY)
59 static bool in_migration;
61 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
63 static MemoryRegion *system_memory;
64 static MemoryRegion *system_io;
66 AddressSpace address_space_io;
67 AddressSpace address_space_memory;
69 MemoryRegion io_mem_rom, io_mem_notdirty;
70 static MemoryRegion io_mem_unassigned;
72 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
73 #define RAM_PREALLOC (1 << 0)
75 /* RAM is mmap-ed with MAP_SHARED */
76 #define RAM_SHARED (1 << 1)
78 #endif
80 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
81 /* current CPU in the current thread. It is only valid inside
82 cpu_exec() */
83 DEFINE_TLS(CPUState *, current_cpu);
84 /* 0 = Do not count executed instructions.
85 1 = Precise instruction counting.
86 2 = Adaptive rate instruction counting. */
87 int use_icount;
89 #if !defined(CONFIG_USER_ONLY)
91 typedef struct PhysPageEntry PhysPageEntry;
93 struct PhysPageEntry {
94 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
95 uint32_t skip : 6;
96 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
97 uint32_t ptr : 26;
100 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
102 /* Size of the L2 (and L3, etc) page tables. */
103 #define ADDR_SPACE_BITS 64
105 #define P_L2_BITS 9
106 #define P_L2_SIZE (1 << P_L2_BITS)
108 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
110 typedef PhysPageEntry Node[P_L2_SIZE];
112 typedef struct PhysPageMap {
113 unsigned sections_nb;
114 unsigned sections_nb_alloc;
115 unsigned nodes_nb;
116 unsigned nodes_nb_alloc;
117 Node *nodes;
118 MemoryRegionSection *sections;
119 } PhysPageMap;
121 struct AddressSpaceDispatch {
122 /* This is a multi-level map on the physical address space.
123 * The bottom level has pointers to MemoryRegionSections.
125 PhysPageEntry phys_map;
126 PhysPageMap map;
127 AddressSpace *as;
130 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
131 typedef struct subpage_t {
132 MemoryRegion iomem;
133 AddressSpace *as;
134 hwaddr base;
135 uint16_t sub_section[TARGET_PAGE_SIZE];
136 } subpage_t;
138 #define PHYS_SECTION_UNASSIGNED 0
139 #define PHYS_SECTION_NOTDIRTY 1
140 #define PHYS_SECTION_ROM 2
141 #define PHYS_SECTION_WATCH 3
143 static void io_mem_init(void);
144 static void memory_map_init(void);
145 static void tcg_commit(MemoryListener *listener);
147 static MemoryRegion io_mem_watch;
148 #endif
150 #if !defined(CONFIG_USER_ONLY)
152 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
154 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
155 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
156 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
157 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
161 static uint32_t phys_map_node_alloc(PhysPageMap *map)
163 unsigned i;
164 uint32_t ret;
166 ret = map->nodes_nb++;
167 assert(ret != PHYS_MAP_NODE_NIL);
168 assert(ret != map->nodes_nb_alloc);
169 for (i = 0; i < P_L2_SIZE; ++i) {
170 map->nodes[ret][i].skip = 1;
171 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
173 return ret;
176 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
177 hwaddr *index, hwaddr *nb, uint16_t leaf,
178 int level)
180 PhysPageEntry *p;
181 int i;
182 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
184 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
185 lp->ptr = phys_map_node_alloc(map);
186 p = map->nodes[lp->ptr];
187 if (level == 0) {
188 for (i = 0; i < P_L2_SIZE; i++) {
189 p[i].skip = 0;
190 p[i].ptr = PHYS_SECTION_UNASSIGNED;
193 } else {
194 p = map->nodes[lp->ptr];
196 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
198 while (*nb && lp < &p[P_L2_SIZE]) {
199 if ((*index & (step - 1)) == 0 && *nb >= step) {
200 lp->skip = 0;
201 lp->ptr = leaf;
202 *index += step;
203 *nb -= step;
204 } else {
205 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
207 ++lp;
211 static void phys_page_set(AddressSpaceDispatch *d,
212 hwaddr index, hwaddr nb,
213 uint16_t leaf)
215 /* Wildly overreserve - it doesn't matter much. */
216 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
218 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
221 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
222 * and update our entry so we can skip it and go directly to the destination.
224 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
226 unsigned valid_ptr = P_L2_SIZE;
227 int valid = 0;
228 PhysPageEntry *p;
229 int i;
231 if (lp->ptr == PHYS_MAP_NODE_NIL) {
232 return;
235 p = nodes[lp->ptr];
236 for (i = 0; i < P_L2_SIZE; i++) {
237 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
238 continue;
241 valid_ptr = i;
242 valid++;
243 if (p[i].skip) {
244 phys_page_compact(&p[i], nodes, compacted);
248 /* We can only compress if there's only one child. */
249 if (valid != 1) {
250 return;
253 assert(valid_ptr < P_L2_SIZE);
255 /* Don't compress if it won't fit in the # of bits we have. */
256 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
257 return;
260 lp->ptr = p[valid_ptr].ptr;
261 if (!p[valid_ptr].skip) {
262 /* If our only child is a leaf, make this a leaf. */
263 /* By design, we should have made this node a leaf to begin with so we
264 * should never reach here.
265 * But since it's so simple to handle this, let's do it just in case we
266 * change this rule.
268 lp->skip = 0;
269 } else {
270 lp->skip += p[valid_ptr].skip;
274 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
276 DECLARE_BITMAP(compacted, nodes_nb);
278 if (d->phys_map.skip) {
279 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
283 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
284 Node *nodes, MemoryRegionSection *sections)
286 PhysPageEntry *p;
287 hwaddr index = addr >> TARGET_PAGE_BITS;
288 int i;
290 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
291 if (lp.ptr == PHYS_MAP_NODE_NIL) {
292 return &sections[PHYS_SECTION_UNASSIGNED];
294 p = nodes[lp.ptr];
295 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
298 if (sections[lp.ptr].size.hi ||
299 range_covers_byte(sections[lp.ptr].offset_within_address_space,
300 sections[lp.ptr].size.lo, addr)) {
301 return &sections[lp.ptr];
302 } else {
303 return &sections[PHYS_SECTION_UNASSIGNED];
307 bool memory_region_is_unassigned(MemoryRegion *mr)
309 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
310 && mr != &io_mem_watch;
313 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
314 hwaddr addr,
315 bool resolve_subpage)
317 MemoryRegionSection *section;
318 subpage_t *subpage;
320 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
321 if (resolve_subpage && section->mr->subpage) {
322 subpage = container_of(section->mr, subpage_t, iomem);
323 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
325 return section;
328 static MemoryRegionSection *
329 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
330 hwaddr *plen, bool resolve_subpage)
332 MemoryRegionSection *section;
333 Int128 diff;
335 section = address_space_lookup_region(d, addr, resolve_subpage);
336 /* Compute offset within MemoryRegionSection */
337 addr -= section->offset_within_address_space;
339 /* Compute offset within MemoryRegion */
340 *xlat = addr + section->offset_within_region;
342 diff = int128_sub(section->mr->size, int128_make64(addr));
343 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
344 return section;
347 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
349 if (memory_region_is_ram(mr)) {
350 return !(is_write && mr->readonly);
352 if (memory_region_is_romd(mr)) {
353 return !is_write;
356 return false;
359 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
360 hwaddr *xlat, hwaddr *plen,
361 bool is_write)
363 IOMMUTLBEntry iotlb;
364 MemoryRegionSection *section;
365 MemoryRegion *mr;
366 hwaddr len = *plen;
368 for (;;) {
369 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
370 mr = section->mr;
372 if (!mr->iommu_ops) {
373 break;
376 iotlb = mr->iommu_ops->translate(mr, addr);
377 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
378 | (addr & iotlb.addr_mask));
379 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
380 if (!(iotlb.perm & (1 << is_write))) {
381 mr = &io_mem_unassigned;
382 break;
385 as = iotlb.target_as;
388 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
389 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
390 len = MIN(page, len);
393 *plen = len;
394 *xlat = addr;
395 return mr;
398 MemoryRegionSection *
399 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
400 hwaddr *plen)
402 MemoryRegionSection *section;
403 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
405 assert(!section->mr->iommu_ops);
406 return section;
408 #endif
410 void cpu_exec_init_all(void)
412 #if !defined(CONFIG_USER_ONLY)
413 qemu_mutex_init(&ram_list.mutex);
414 memory_map_init();
415 io_mem_init();
416 #endif
419 #if !defined(CONFIG_USER_ONLY)
421 static int cpu_common_post_load(void *opaque, int version_id)
423 CPUState *cpu = opaque;
425 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
426 version_id is increased. */
427 cpu->interrupt_request &= ~0x01;
428 tlb_flush(cpu, 1);
430 return 0;
433 const VMStateDescription vmstate_cpu_common = {
434 .name = "cpu_common",
435 .version_id = 1,
436 .minimum_version_id = 1,
437 .post_load = cpu_common_post_load,
438 .fields = (VMStateField[]) {
439 VMSTATE_UINT32(halted, CPUState),
440 VMSTATE_UINT32(interrupt_request, CPUState),
441 VMSTATE_END_OF_LIST()
445 #endif
447 CPUState *qemu_get_cpu(int index)
449 CPUState *cpu;
451 CPU_FOREACH(cpu) {
452 if (cpu->cpu_index == index) {
453 return cpu;
457 return NULL;
460 #if !defined(CONFIG_USER_ONLY)
461 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
463 /* We only support one address space per cpu at the moment. */
464 assert(cpu->as == as);
466 if (cpu->tcg_as_listener) {
467 memory_listener_unregister(cpu->tcg_as_listener);
468 } else {
469 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
471 cpu->tcg_as_listener->commit = tcg_commit;
472 memory_listener_register(cpu->tcg_as_listener, as);
474 #endif
476 void cpu_exec_init(CPUArchState *env)
478 CPUState *cpu = ENV_GET_CPU(env);
479 CPUClass *cc = CPU_GET_CLASS(cpu);
480 CPUState *some_cpu;
481 int cpu_index;
483 #if defined(CONFIG_USER_ONLY)
484 cpu_list_lock();
485 #endif
486 cpu_index = 0;
487 CPU_FOREACH(some_cpu) {
488 cpu_index++;
490 cpu->cpu_index = cpu_index;
491 cpu->numa_node = 0;
492 QTAILQ_INIT(&cpu->breakpoints);
493 QTAILQ_INIT(&cpu->watchpoints);
494 #ifndef CONFIG_USER_ONLY
495 cpu->as = &address_space_memory;
496 cpu->thread_id = qemu_get_thread_id();
497 #endif
498 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
499 #if defined(CONFIG_USER_ONLY)
500 cpu_list_unlock();
501 #endif
502 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
503 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
505 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
506 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
507 cpu_save, cpu_load, env);
508 assert(cc->vmsd == NULL);
509 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
510 #endif
511 if (cc->vmsd != NULL) {
512 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
516 #if defined(TARGET_HAS_ICE)
517 #if defined(CONFIG_USER_ONLY)
518 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
520 tb_invalidate_phys_page_range(pc, pc + 1, 0);
522 #else
523 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
525 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
526 if (phys != -1) {
527 tb_invalidate_phys_addr(cpu->as,
528 phys | (pc & ~TARGET_PAGE_MASK));
531 #endif
532 #endif /* TARGET_HAS_ICE */
534 #if defined(CONFIG_USER_ONLY)
535 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
540 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
541 int flags, CPUWatchpoint **watchpoint)
543 return -ENOSYS;
545 #else
546 /* Add a watchpoint. */
547 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
548 int flags, CPUWatchpoint **watchpoint)
550 vaddr len_mask = ~(len - 1);
551 CPUWatchpoint *wp;
553 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
554 if ((len & (len - 1)) || (addr & ~len_mask) ||
555 len == 0 || len > TARGET_PAGE_SIZE) {
556 error_report("tried to set invalid watchpoint at %"
557 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
558 return -EINVAL;
560 wp = g_malloc(sizeof(*wp));
562 wp->vaddr = addr;
563 wp->len_mask = len_mask;
564 wp->flags = flags;
566 /* keep all GDB-injected watchpoints in front */
567 if (flags & BP_GDB) {
568 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
569 } else {
570 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
573 tlb_flush_page(cpu, addr);
575 if (watchpoint)
576 *watchpoint = wp;
577 return 0;
580 /* Remove a specific watchpoint. */
581 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
582 int flags)
584 vaddr len_mask = ~(len - 1);
585 CPUWatchpoint *wp;
587 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
588 if (addr == wp->vaddr && len_mask == wp->len_mask
589 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
590 cpu_watchpoint_remove_by_ref(cpu, wp);
591 return 0;
594 return -ENOENT;
597 /* Remove a specific watchpoint by reference. */
598 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
600 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
602 tlb_flush_page(cpu, watchpoint->vaddr);
604 g_free(watchpoint);
607 /* Remove all matching watchpoints. */
608 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
610 CPUWatchpoint *wp, *next;
612 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
613 if (wp->flags & mask) {
614 cpu_watchpoint_remove_by_ref(cpu, wp);
618 #endif
620 /* Add a breakpoint. */
621 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
622 CPUBreakpoint **breakpoint)
624 #if defined(TARGET_HAS_ICE)
625 CPUBreakpoint *bp;
627 bp = g_malloc(sizeof(*bp));
629 bp->pc = pc;
630 bp->flags = flags;
632 /* keep all GDB-injected breakpoints in front */
633 if (flags & BP_GDB) {
634 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
635 } else {
636 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
639 breakpoint_invalidate(cpu, pc);
641 if (breakpoint) {
642 *breakpoint = bp;
644 return 0;
645 #else
646 return -ENOSYS;
647 #endif
650 /* Remove a specific breakpoint. */
651 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
653 #if defined(TARGET_HAS_ICE)
654 CPUBreakpoint *bp;
656 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
657 if (bp->pc == pc && bp->flags == flags) {
658 cpu_breakpoint_remove_by_ref(cpu, bp);
659 return 0;
662 return -ENOENT;
663 #else
664 return -ENOSYS;
665 #endif
668 /* Remove a specific breakpoint by reference. */
669 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
671 #if defined(TARGET_HAS_ICE)
672 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
674 breakpoint_invalidate(cpu, breakpoint->pc);
676 g_free(breakpoint);
677 #endif
680 /* Remove all matching breakpoints. */
681 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
683 #if defined(TARGET_HAS_ICE)
684 CPUBreakpoint *bp, *next;
686 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
687 if (bp->flags & mask) {
688 cpu_breakpoint_remove_by_ref(cpu, bp);
691 #endif
694 /* enable or disable single step mode. EXCP_DEBUG is returned by the
695 CPU loop after each instruction */
696 void cpu_single_step(CPUState *cpu, int enabled)
698 #if defined(TARGET_HAS_ICE)
699 if (cpu->singlestep_enabled != enabled) {
700 cpu->singlestep_enabled = enabled;
701 if (kvm_enabled()) {
702 kvm_update_guest_debug(cpu, 0);
703 } else {
704 /* must flush all the translated code to avoid inconsistencies */
705 /* XXX: only flush what is necessary */
706 CPUArchState *env = cpu->env_ptr;
707 tb_flush(env);
710 #endif
713 void cpu_abort(CPUState *cpu, const char *fmt, ...)
715 va_list ap;
716 va_list ap2;
718 va_start(ap, fmt);
719 va_copy(ap2, ap);
720 fprintf(stderr, "qemu: fatal: ");
721 vfprintf(stderr, fmt, ap);
722 fprintf(stderr, "\n");
723 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
724 if (qemu_log_enabled()) {
725 qemu_log("qemu: fatal: ");
726 qemu_log_vprintf(fmt, ap2);
727 qemu_log("\n");
728 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
729 qemu_log_flush();
730 qemu_log_close();
732 va_end(ap2);
733 va_end(ap);
734 #if defined(CONFIG_USER_ONLY)
736 struct sigaction act;
737 sigfillset(&act.sa_mask);
738 act.sa_handler = SIG_DFL;
739 sigaction(SIGABRT, &act, NULL);
741 #endif
742 abort();
745 #if !defined(CONFIG_USER_ONLY)
746 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
748 RAMBlock *block;
750 /* The list is protected by the iothread lock here. */
751 block = ram_list.mru_block;
752 if (block && addr - block->offset < block->length) {
753 goto found;
755 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
756 if (addr - block->offset < block->length) {
757 goto found;
761 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
762 abort();
764 found:
765 ram_list.mru_block = block;
766 return block;
769 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
771 ram_addr_t start1;
772 RAMBlock *block;
773 ram_addr_t end;
775 end = TARGET_PAGE_ALIGN(start + length);
776 start &= TARGET_PAGE_MASK;
778 block = qemu_get_ram_block(start);
779 assert(block == qemu_get_ram_block(end - 1));
780 start1 = (uintptr_t)block->host + (start - block->offset);
781 cpu_tlb_reset_dirty_all(start1, length);
784 /* Note: start and end must be within the same ram block. */
785 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
786 unsigned client)
788 if (length == 0)
789 return;
790 cpu_physical_memory_clear_dirty_range(start, length, client);
792 if (tcg_enabled()) {
793 tlb_reset_dirty_range_all(start, length);
797 static void cpu_physical_memory_set_dirty_tracking(bool enable)
799 in_migration = enable;
802 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
803 MemoryRegionSection *section,
804 target_ulong vaddr,
805 hwaddr paddr, hwaddr xlat,
806 int prot,
807 target_ulong *address)
809 hwaddr iotlb;
810 CPUWatchpoint *wp;
812 if (memory_region_is_ram(section->mr)) {
813 /* Normal RAM. */
814 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
815 + xlat;
816 if (!section->readonly) {
817 iotlb |= PHYS_SECTION_NOTDIRTY;
818 } else {
819 iotlb |= PHYS_SECTION_ROM;
821 } else {
822 iotlb = section - section->address_space->dispatch->map.sections;
823 iotlb += xlat;
826 /* Make accesses to pages with watchpoints go via the
827 watchpoint trap routines. */
828 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
829 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
830 /* Avoid trapping reads of pages with a write breakpoint. */
831 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
832 iotlb = PHYS_SECTION_WATCH + paddr;
833 *address |= TLB_MMIO;
834 break;
839 return iotlb;
841 #endif /* defined(CONFIG_USER_ONLY) */
843 #if !defined(CONFIG_USER_ONLY)
845 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
846 uint16_t section);
847 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
849 static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
852 * Set a custom physical guest memory alloator.
853 * Accelerators with unusual needs may need this. Hopefully, we can
854 * get rid of it eventually.
856 void phys_mem_set_alloc(void *(*alloc)(size_t))
858 phys_mem_alloc = alloc;
861 static uint16_t phys_section_add(PhysPageMap *map,
862 MemoryRegionSection *section)
864 /* The physical section number is ORed with a page-aligned
865 * pointer to produce the iotlb entries. Thus it should
866 * never overflow into the page-aligned value.
868 assert(map->sections_nb < TARGET_PAGE_SIZE);
870 if (map->sections_nb == map->sections_nb_alloc) {
871 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
872 map->sections = g_renew(MemoryRegionSection, map->sections,
873 map->sections_nb_alloc);
875 map->sections[map->sections_nb] = *section;
876 memory_region_ref(section->mr);
877 return map->sections_nb++;
880 static void phys_section_destroy(MemoryRegion *mr)
882 memory_region_unref(mr);
884 if (mr->subpage) {
885 subpage_t *subpage = container_of(mr, subpage_t, iomem);
886 memory_region_destroy(&subpage->iomem);
887 g_free(subpage);
891 static void phys_sections_free(PhysPageMap *map)
893 while (map->sections_nb > 0) {
894 MemoryRegionSection *section = &map->sections[--map->sections_nb];
895 phys_section_destroy(section->mr);
897 g_free(map->sections);
898 g_free(map->nodes);
901 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
903 subpage_t *subpage;
904 hwaddr base = section->offset_within_address_space
905 & TARGET_PAGE_MASK;
906 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
907 d->map.nodes, d->map.sections);
908 MemoryRegionSection subsection = {
909 .offset_within_address_space = base,
910 .size = int128_make64(TARGET_PAGE_SIZE),
912 hwaddr start, end;
914 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
916 if (!(existing->mr->subpage)) {
917 subpage = subpage_init(d->as, base);
918 subsection.address_space = d->as;
919 subsection.mr = &subpage->iomem;
920 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
921 phys_section_add(&d->map, &subsection));
922 } else {
923 subpage = container_of(existing->mr, subpage_t, iomem);
925 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
926 end = start + int128_get64(section->size) - 1;
927 subpage_register(subpage, start, end,
928 phys_section_add(&d->map, section));
932 static void register_multipage(AddressSpaceDispatch *d,
933 MemoryRegionSection *section)
935 hwaddr start_addr = section->offset_within_address_space;
936 uint16_t section_index = phys_section_add(&d->map, section);
937 uint64_t num_pages = int128_get64(int128_rshift(section->size,
938 TARGET_PAGE_BITS));
940 assert(num_pages);
941 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
944 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
946 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
947 AddressSpaceDispatch *d = as->next_dispatch;
948 MemoryRegionSection now = *section, remain = *section;
949 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
951 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
952 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
953 - now.offset_within_address_space;
955 now.size = int128_min(int128_make64(left), now.size);
956 register_subpage(d, &now);
957 } else {
958 now.size = int128_zero();
960 while (int128_ne(remain.size, now.size)) {
961 remain.size = int128_sub(remain.size, now.size);
962 remain.offset_within_address_space += int128_get64(now.size);
963 remain.offset_within_region += int128_get64(now.size);
964 now = remain;
965 if (int128_lt(remain.size, page_size)) {
966 register_subpage(d, &now);
967 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
968 now.size = page_size;
969 register_subpage(d, &now);
970 } else {
971 now.size = int128_and(now.size, int128_neg(page_size));
972 register_multipage(d, &now);
977 void qemu_flush_coalesced_mmio_buffer(void)
979 if (kvm_enabled())
980 kvm_flush_coalesced_mmio_buffer();
983 void qemu_mutex_lock_ramlist(void)
985 qemu_mutex_lock(&ram_list.mutex);
988 void qemu_mutex_unlock_ramlist(void)
990 qemu_mutex_unlock(&ram_list.mutex);
993 #ifdef __linux__
995 #include <sys/vfs.h>
997 #define HUGETLBFS_MAGIC 0x958458f6
999 static long gethugepagesize(const char *path)
1001 struct statfs fs;
1002 int ret;
1004 do {
1005 ret = statfs(path, &fs);
1006 } while (ret != 0 && errno == EINTR);
1008 if (ret != 0) {
1009 perror(path);
1010 return 0;
1013 if (fs.f_type != HUGETLBFS_MAGIC)
1014 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1016 return fs.f_bsize;
1019 static void *file_ram_alloc(RAMBlock *block,
1020 ram_addr_t memory,
1021 const char *path,
1022 Error **errp)
1024 char *filename;
1025 char *sanitized_name;
1026 char *c;
1027 void *area;
1028 int fd;
1029 unsigned long hpagesize;
1031 hpagesize = gethugepagesize(path);
1032 if (!hpagesize) {
1033 goto error;
1036 if (memory < hpagesize) {
1037 return NULL;
1040 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1041 error_setg(errp,
1042 "host lacks kvm mmu notifiers, -mem-path unsupported");
1043 goto error;
1046 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1047 sanitized_name = g_strdup(block->mr->name);
1048 for (c = sanitized_name; *c != '\0'; c++) {
1049 if (*c == '/')
1050 *c = '_';
1053 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1054 sanitized_name);
1055 g_free(sanitized_name);
1057 fd = mkstemp(filename);
1058 if (fd < 0) {
1059 error_setg_errno(errp, errno,
1060 "unable to create backing store for hugepages");
1061 g_free(filename);
1062 goto error;
1064 unlink(filename);
1065 g_free(filename);
1067 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1070 * ftruncate is not supported by hugetlbfs in older
1071 * hosts, so don't bother bailing out on errors.
1072 * If anything goes wrong with it under other filesystems,
1073 * mmap will fail.
1075 if (ftruncate(fd, memory)) {
1076 perror("ftruncate");
1079 area = mmap(0, memory, PROT_READ | PROT_WRITE,
1080 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
1081 fd, 0);
1082 if (area == MAP_FAILED) {
1083 error_setg_errno(errp, errno,
1084 "unable to map backing store for hugepages");
1085 close(fd);
1086 goto error;
1089 if (mem_prealloc) {
1090 os_mem_prealloc(fd, area, memory);
1093 block->fd = fd;
1094 return area;
1096 error:
1097 if (mem_prealloc) {
1098 exit(1);
1100 return NULL;
1102 #endif
1104 static ram_addr_t find_ram_offset(ram_addr_t size)
1106 RAMBlock *block, *next_block;
1107 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1109 assert(size != 0); /* it would hand out same offset multiple times */
1111 if (QTAILQ_EMPTY(&ram_list.blocks))
1112 return 0;
1114 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1115 ram_addr_t end, next = RAM_ADDR_MAX;
1117 end = block->offset + block->length;
1119 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1120 if (next_block->offset >= end) {
1121 next = MIN(next, next_block->offset);
1124 if (next - end >= size && next - end < mingap) {
1125 offset = end;
1126 mingap = next - end;
1130 if (offset == RAM_ADDR_MAX) {
1131 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1132 (uint64_t)size);
1133 abort();
1136 return offset;
1139 ram_addr_t last_ram_offset(void)
1141 RAMBlock *block;
1142 ram_addr_t last = 0;
1144 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1145 last = MAX(last, block->offset + block->length);
1147 return last;
1150 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1152 int ret;
1154 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1155 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1156 "dump-guest-core", true)) {
1157 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1158 if (ret) {
1159 perror("qemu_madvise");
1160 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1161 "but dump_guest_core=off specified\n");
1166 static RAMBlock *find_ram_block(ram_addr_t addr)
1168 RAMBlock *block;
1170 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1171 if (block->offset == addr) {
1172 return block;
1176 return NULL;
1179 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1181 RAMBlock *new_block = find_ram_block(addr);
1182 RAMBlock *block;
1184 assert(new_block);
1185 assert(!new_block->idstr[0]);
1187 if (dev) {
1188 char *id = qdev_get_dev_path(dev);
1189 if (id) {
1190 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1191 g_free(id);
1194 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1196 /* This assumes the iothread lock is taken here too. */
1197 qemu_mutex_lock_ramlist();
1198 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1199 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1200 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1201 new_block->idstr);
1202 abort();
1205 qemu_mutex_unlock_ramlist();
1208 void qemu_ram_unset_idstr(ram_addr_t addr)
1210 RAMBlock *block = find_ram_block(addr);
1212 if (block) {
1213 memset(block->idstr, 0, sizeof(block->idstr));
1217 static int memory_try_enable_merging(void *addr, size_t len)
1219 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1220 /* disabled by the user */
1221 return 0;
1224 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1227 static ram_addr_t ram_block_add(RAMBlock *new_block)
1229 RAMBlock *block;
1230 ram_addr_t old_ram_size, new_ram_size;
1232 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1234 /* This assumes the iothread lock is taken here too. */
1235 qemu_mutex_lock_ramlist();
1236 new_block->offset = find_ram_offset(new_block->length);
1238 if (!new_block->host) {
1239 if (xen_enabled()) {
1240 xen_ram_alloc(new_block->offset, new_block->length, new_block->mr);
1241 } else {
1242 new_block->host = phys_mem_alloc(new_block->length);
1243 if (!new_block->host) {
1244 fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
1245 new_block->mr->name, strerror(errno));
1246 exit(1);
1248 memory_try_enable_merging(new_block->host, new_block->length);
1252 /* Keep the list sorted from biggest to smallest block. */
1253 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1254 if (block->length < new_block->length) {
1255 break;
1258 if (block) {
1259 QTAILQ_INSERT_BEFORE(block, new_block, next);
1260 } else {
1261 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1263 ram_list.mru_block = NULL;
1265 ram_list.version++;
1266 qemu_mutex_unlock_ramlist();
1268 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1270 if (new_ram_size > old_ram_size) {
1271 int i;
1272 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1273 ram_list.dirty_memory[i] =
1274 bitmap_zero_extend(ram_list.dirty_memory[i],
1275 old_ram_size, new_ram_size);
1278 cpu_physical_memory_set_dirty_range(new_block->offset, new_block->length);
1280 qemu_ram_setup_dump(new_block->host, new_block->length);
1281 qemu_madvise(new_block->host, new_block->length, QEMU_MADV_HUGEPAGE);
1282 qemu_madvise(new_block->host, new_block->length, QEMU_MADV_DONTFORK);
1284 if (kvm_enabled()) {
1285 kvm_setup_guest_memory(new_block->host, new_block->length);
1288 return new_block->offset;
1291 #ifdef __linux__
1292 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1293 bool share, const char *mem_path,
1294 Error **errp)
1296 RAMBlock *new_block;
1298 if (xen_enabled()) {
1299 error_setg(errp, "-mem-path not supported with Xen");
1300 return -1;
1303 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1305 * file_ram_alloc() needs to allocate just like
1306 * phys_mem_alloc, but we haven't bothered to provide
1307 * a hook there.
1309 error_setg(errp,
1310 "-mem-path not supported with this accelerator");
1311 return -1;
1314 size = TARGET_PAGE_ALIGN(size);
1315 new_block = g_malloc0(sizeof(*new_block));
1316 new_block->mr = mr;
1317 new_block->length = size;
1318 new_block->flags = share ? RAM_SHARED : 0;
1319 new_block->host = file_ram_alloc(new_block, size,
1320 mem_path, errp);
1321 if (!new_block->host) {
1322 g_free(new_block);
1323 return -1;
1326 return ram_block_add(new_block);
1328 #endif
1330 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1331 MemoryRegion *mr)
1333 RAMBlock *new_block;
1335 size = TARGET_PAGE_ALIGN(size);
1336 new_block = g_malloc0(sizeof(*new_block));
1337 new_block->mr = mr;
1338 new_block->length = size;
1339 new_block->fd = -1;
1340 new_block->host = host;
1341 if (host) {
1342 new_block->flags |= RAM_PREALLOC;
1344 return ram_block_add(new_block);
1347 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1349 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1352 void qemu_ram_free_from_ptr(ram_addr_t addr)
1354 RAMBlock *block;
1356 /* This assumes the iothread lock is taken here too. */
1357 qemu_mutex_lock_ramlist();
1358 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1359 if (addr == block->offset) {
1360 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1361 ram_list.mru_block = NULL;
1362 ram_list.version++;
1363 g_free(block);
1364 break;
1367 qemu_mutex_unlock_ramlist();
1370 void qemu_ram_free(ram_addr_t addr)
1372 RAMBlock *block;
1374 /* This assumes the iothread lock is taken here too. */
1375 qemu_mutex_lock_ramlist();
1376 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1377 if (addr == block->offset) {
1378 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1379 ram_list.mru_block = NULL;
1380 ram_list.version++;
1381 if (block->flags & RAM_PREALLOC) {
1383 } else if (xen_enabled()) {
1384 xen_invalidate_map_cache_entry(block->host);
1385 #ifndef _WIN32
1386 } else if (block->fd >= 0) {
1387 munmap(block->host, block->length);
1388 close(block->fd);
1389 #endif
1390 } else {
1391 qemu_anon_ram_free(block->host, block->length);
1393 g_free(block);
1394 break;
1397 qemu_mutex_unlock_ramlist();
1401 #ifndef _WIN32
1402 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1404 RAMBlock *block;
1405 ram_addr_t offset;
1406 int flags;
1407 void *area, *vaddr;
1409 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1410 offset = addr - block->offset;
1411 if (offset < block->length) {
1412 vaddr = block->host + offset;
1413 if (block->flags & RAM_PREALLOC) {
1415 } else if (xen_enabled()) {
1416 abort();
1417 } else {
1418 flags = MAP_FIXED;
1419 munmap(vaddr, length);
1420 if (block->fd >= 0) {
1421 flags |= (block->flags & RAM_SHARED ?
1422 MAP_SHARED : MAP_PRIVATE);
1423 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1424 flags, block->fd, offset);
1425 } else {
1427 * Remap needs to match alloc. Accelerators that
1428 * set phys_mem_alloc never remap. If they did,
1429 * we'd need a remap hook here.
1431 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1433 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1434 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1435 flags, -1, 0);
1437 if (area != vaddr) {
1438 fprintf(stderr, "Could not remap addr: "
1439 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1440 length, addr);
1441 exit(1);
1443 memory_try_enable_merging(vaddr, length);
1444 qemu_ram_setup_dump(vaddr, length);
1446 return;
1450 #endif /* !_WIN32 */
1452 int qemu_get_ram_fd(ram_addr_t addr)
1454 RAMBlock *block = qemu_get_ram_block(addr);
1456 return block->fd;
1459 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1460 With the exception of the softmmu code in this file, this should
1461 only be used for local memory (e.g. video ram) that the device owns,
1462 and knows it isn't going to access beyond the end of the block.
1464 It should not be used for general purpose DMA.
1465 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1467 void *qemu_get_ram_ptr(ram_addr_t addr)
1469 RAMBlock *block = qemu_get_ram_block(addr);
1471 if (xen_enabled()) {
1472 /* We need to check if the requested address is in the RAM
1473 * because we don't want to map the entire memory in QEMU.
1474 * In that case just map until the end of the page.
1476 if (block->offset == 0) {
1477 return xen_map_cache(addr, 0, 0);
1478 } else if (block->host == NULL) {
1479 block->host =
1480 xen_map_cache(block->offset, block->length, 1);
1483 return block->host + (addr - block->offset);
1486 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1487 * but takes a size argument */
1488 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1490 if (*size == 0) {
1491 return NULL;
1493 if (xen_enabled()) {
1494 return xen_map_cache(addr, *size, 1);
1495 } else {
1496 RAMBlock *block;
1498 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1499 if (addr - block->offset < block->length) {
1500 if (addr - block->offset + *size > block->length)
1501 *size = block->length - addr + block->offset;
1502 return block->host + (addr - block->offset);
1506 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1507 abort();
1511 /* Some of the softmmu routines need to translate from a host pointer
1512 (typically a TLB entry) back to a ram offset. */
1513 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1515 RAMBlock *block;
1516 uint8_t *host = ptr;
1518 if (xen_enabled()) {
1519 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1520 return qemu_get_ram_block(*ram_addr)->mr;
1523 block = ram_list.mru_block;
1524 if (block && block->host && host - block->host < block->length) {
1525 goto found;
1528 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1529 /* This case append when the block is not mapped. */
1530 if (block->host == NULL) {
1531 continue;
1533 if (host - block->host < block->length) {
1534 goto found;
1538 return NULL;
1540 found:
1541 *ram_addr = block->offset + (host - block->host);
1542 return block->mr;
1545 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1546 uint64_t val, unsigned size)
1548 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1549 tb_invalidate_phys_page_fast(ram_addr, size);
1551 switch (size) {
1552 case 1:
1553 stb_p(qemu_get_ram_ptr(ram_addr), val);
1554 break;
1555 case 2:
1556 stw_p(qemu_get_ram_ptr(ram_addr), val);
1557 break;
1558 case 4:
1559 stl_p(qemu_get_ram_ptr(ram_addr), val);
1560 break;
1561 default:
1562 abort();
1564 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_MIGRATION);
1565 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_VGA);
1566 /* we remove the notdirty callback only if the code has been
1567 flushed */
1568 if (!cpu_physical_memory_is_clean(ram_addr)) {
1569 CPUArchState *env = current_cpu->env_ptr;
1570 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1574 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1575 unsigned size, bool is_write)
1577 return is_write;
1580 static const MemoryRegionOps notdirty_mem_ops = {
1581 .write = notdirty_mem_write,
1582 .valid.accepts = notdirty_mem_accepts,
1583 .endianness = DEVICE_NATIVE_ENDIAN,
1586 /* Generate a debug exception if a watchpoint has been hit. */
1587 static void check_watchpoint(int offset, int len_mask, int flags)
1589 CPUState *cpu = current_cpu;
1590 CPUArchState *env = cpu->env_ptr;
1591 target_ulong pc, cs_base;
1592 target_ulong vaddr;
1593 CPUWatchpoint *wp;
1594 int cpu_flags;
1596 if (cpu->watchpoint_hit) {
1597 /* We re-entered the check after replacing the TB. Now raise
1598 * the debug interrupt so that is will trigger after the
1599 * current instruction. */
1600 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1601 return;
1603 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1604 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1605 if ((vaddr == (wp->vaddr & len_mask) ||
1606 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1607 wp->flags |= BP_WATCHPOINT_HIT;
1608 if (!cpu->watchpoint_hit) {
1609 cpu->watchpoint_hit = wp;
1610 tb_check_watchpoint(cpu);
1611 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1612 cpu->exception_index = EXCP_DEBUG;
1613 cpu_loop_exit(cpu);
1614 } else {
1615 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1616 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1617 cpu_resume_from_signal(cpu, NULL);
1620 } else {
1621 wp->flags &= ~BP_WATCHPOINT_HIT;
1626 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1627 so these check for a hit then pass through to the normal out-of-line
1628 phys routines. */
1629 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1630 unsigned size)
1632 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1633 switch (size) {
1634 case 1: return ldub_phys(&address_space_memory, addr);
1635 case 2: return lduw_phys(&address_space_memory, addr);
1636 case 4: return ldl_phys(&address_space_memory, addr);
1637 default: abort();
1641 static void watch_mem_write(void *opaque, hwaddr addr,
1642 uint64_t val, unsigned size)
1644 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1645 switch (size) {
1646 case 1:
1647 stb_phys(&address_space_memory, addr, val);
1648 break;
1649 case 2:
1650 stw_phys(&address_space_memory, addr, val);
1651 break;
1652 case 4:
1653 stl_phys(&address_space_memory, addr, val);
1654 break;
1655 default: abort();
1659 static const MemoryRegionOps watch_mem_ops = {
1660 .read = watch_mem_read,
1661 .write = watch_mem_write,
1662 .endianness = DEVICE_NATIVE_ENDIAN,
1665 static uint64_t subpage_read(void *opaque, hwaddr addr,
1666 unsigned len)
1668 subpage_t *subpage = opaque;
1669 uint8_t buf[4];
1671 #if defined(DEBUG_SUBPAGE)
1672 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1673 subpage, len, addr);
1674 #endif
1675 address_space_read(subpage->as, addr + subpage->base, buf, len);
1676 switch (len) {
1677 case 1:
1678 return ldub_p(buf);
1679 case 2:
1680 return lduw_p(buf);
1681 case 4:
1682 return ldl_p(buf);
1683 default:
1684 abort();
1688 static void subpage_write(void *opaque, hwaddr addr,
1689 uint64_t value, unsigned len)
1691 subpage_t *subpage = opaque;
1692 uint8_t buf[4];
1694 #if defined(DEBUG_SUBPAGE)
1695 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1696 " value %"PRIx64"\n",
1697 __func__, subpage, len, addr, value);
1698 #endif
1699 switch (len) {
1700 case 1:
1701 stb_p(buf, value);
1702 break;
1703 case 2:
1704 stw_p(buf, value);
1705 break;
1706 case 4:
1707 stl_p(buf, value);
1708 break;
1709 default:
1710 abort();
1712 address_space_write(subpage->as, addr + subpage->base, buf, len);
1715 static bool subpage_accepts(void *opaque, hwaddr addr,
1716 unsigned len, bool is_write)
1718 subpage_t *subpage = opaque;
1719 #if defined(DEBUG_SUBPAGE)
1720 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1721 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1722 #endif
1724 return address_space_access_valid(subpage->as, addr + subpage->base,
1725 len, is_write);
1728 static const MemoryRegionOps subpage_ops = {
1729 .read = subpage_read,
1730 .write = subpage_write,
1731 .valid.accepts = subpage_accepts,
1732 .endianness = DEVICE_NATIVE_ENDIAN,
1735 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1736 uint16_t section)
1738 int idx, eidx;
1740 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1741 return -1;
1742 idx = SUBPAGE_IDX(start);
1743 eidx = SUBPAGE_IDX(end);
1744 #if defined(DEBUG_SUBPAGE)
1745 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1746 __func__, mmio, start, end, idx, eidx, section);
1747 #endif
1748 for (; idx <= eidx; idx++) {
1749 mmio->sub_section[idx] = section;
1752 return 0;
1755 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1757 subpage_t *mmio;
1759 mmio = g_malloc0(sizeof(subpage_t));
1761 mmio->as = as;
1762 mmio->base = base;
1763 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1764 "subpage", TARGET_PAGE_SIZE);
1765 mmio->iomem.subpage = true;
1766 #if defined(DEBUG_SUBPAGE)
1767 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1768 mmio, base, TARGET_PAGE_SIZE);
1769 #endif
1770 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1772 return mmio;
1775 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
1776 MemoryRegion *mr)
1778 assert(as);
1779 MemoryRegionSection section = {
1780 .address_space = as,
1781 .mr = mr,
1782 .offset_within_address_space = 0,
1783 .offset_within_region = 0,
1784 .size = int128_2_64(),
1787 return phys_section_add(map, &section);
1790 MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1792 return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1795 static void io_mem_init(void)
1797 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1798 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1799 "unassigned", UINT64_MAX);
1800 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1801 "notdirty", UINT64_MAX);
1802 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1803 "watch", UINT64_MAX);
1806 static void mem_begin(MemoryListener *listener)
1808 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1809 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1810 uint16_t n;
1812 n = dummy_section(&d->map, as, &io_mem_unassigned);
1813 assert(n == PHYS_SECTION_UNASSIGNED);
1814 n = dummy_section(&d->map, as, &io_mem_notdirty);
1815 assert(n == PHYS_SECTION_NOTDIRTY);
1816 n = dummy_section(&d->map, as, &io_mem_rom);
1817 assert(n == PHYS_SECTION_ROM);
1818 n = dummy_section(&d->map, as, &io_mem_watch);
1819 assert(n == PHYS_SECTION_WATCH);
1821 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1822 d->as = as;
1823 as->next_dispatch = d;
1826 static void mem_commit(MemoryListener *listener)
1828 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1829 AddressSpaceDispatch *cur = as->dispatch;
1830 AddressSpaceDispatch *next = as->next_dispatch;
1832 phys_page_compact_all(next, next->map.nodes_nb);
1834 as->dispatch = next;
1836 if (cur) {
1837 phys_sections_free(&cur->map);
1838 g_free(cur);
1842 static void tcg_commit(MemoryListener *listener)
1844 CPUState *cpu;
1846 /* since each CPU stores ram addresses in its TLB cache, we must
1847 reset the modified entries */
1848 /* XXX: slow ! */
1849 CPU_FOREACH(cpu) {
1850 /* FIXME: Disentangle the cpu.h circular files deps so we can
1851 directly get the right CPU from listener. */
1852 if (cpu->tcg_as_listener != listener) {
1853 continue;
1855 tlb_flush(cpu, 1);
1859 static void core_log_global_start(MemoryListener *listener)
1861 cpu_physical_memory_set_dirty_tracking(true);
1864 static void core_log_global_stop(MemoryListener *listener)
1866 cpu_physical_memory_set_dirty_tracking(false);
1869 static MemoryListener core_memory_listener = {
1870 .log_global_start = core_log_global_start,
1871 .log_global_stop = core_log_global_stop,
1872 .priority = 1,
1875 void address_space_init_dispatch(AddressSpace *as)
1877 as->dispatch = NULL;
1878 as->dispatch_listener = (MemoryListener) {
1879 .begin = mem_begin,
1880 .commit = mem_commit,
1881 .region_add = mem_add,
1882 .region_nop = mem_add,
1883 .priority = 0,
1885 memory_listener_register(&as->dispatch_listener, as);
1888 void address_space_destroy_dispatch(AddressSpace *as)
1890 AddressSpaceDispatch *d = as->dispatch;
1892 memory_listener_unregister(&as->dispatch_listener);
1893 g_free(d);
1894 as->dispatch = NULL;
1897 static void memory_map_init(void)
1899 system_memory = g_malloc(sizeof(*system_memory));
1901 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
1902 address_space_init(&address_space_memory, system_memory, "memory");
1904 system_io = g_malloc(sizeof(*system_io));
1905 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
1906 65536);
1907 address_space_init(&address_space_io, system_io, "I/O");
1909 memory_listener_register(&core_memory_listener, &address_space_memory);
1912 MemoryRegion *get_system_memory(void)
1914 return system_memory;
1917 MemoryRegion *get_system_io(void)
1919 return system_io;
1922 #endif /* !defined(CONFIG_USER_ONLY) */
1924 /* physical memory access (slow version, mainly for debug) */
1925 #if defined(CONFIG_USER_ONLY)
1926 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
1927 uint8_t *buf, int len, int is_write)
1929 int l, flags;
1930 target_ulong page;
1931 void * p;
1933 while (len > 0) {
1934 page = addr & TARGET_PAGE_MASK;
1935 l = (page + TARGET_PAGE_SIZE) - addr;
1936 if (l > len)
1937 l = len;
1938 flags = page_get_flags(page);
1939 if (!(flags & PAGE_VALID))
1940 return -1;
1941 if (is_write) {
1942 if (!(flags & PAGE_WRITE))
1943 return -1;
1944 /* XXX: this code should not depend on lock_user */
1945 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1946 return -1;
1947 memcpy(p, buf, l);
1948 unlock_user(p, addr, l);
1949 } else {
1950 if (!(flags & PAGE_READ))
1951 return -1;
1952 /* XXX: this code should not depend on lock_user */
1953 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1954 return -1;
1955 memcpy(buf, p, l);
1956 unlock_user(p, addr, 0);
1958 len -= l;
1959 buf += l;
1960 addr += l;
1962 return 0;
1965 #else
1967 static void invalidate_and_set_dirty(hwaddr addr,
1968 hwaddr length)
1970 if (cpu_physical_memory_is_clean(addr)) {
1971 /* invalidate code */
1972 tb_invalidate_phys_page_range(addr, addr + length, 0);
1973 /* set dirty bit */
1974 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_VGA);
1975 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
1977 xen_modified_memory(addr, length);
1980 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1982 unsigned access_size_max = mr->ops->valid.max_access_size;
1984 /* Regions are assumed to support 1-4 byte accesses unless
1985 otherwise specified. */
1986 if (access_size_max == 0) {
1987 access_size_max = 4;
1990 /* Bound the maximum access by the alignment of the address. */
1991 if (!mr->ops->impl.unaligned) {
1992 unsigned align_size_max = addr & -addr;
1993 if (align_size_max != 0 && align_size_max < access_size_max) {
1994 access_size_max = align_size_max;
1998 /* Don't attempt accesses larger than the maximum. */
1999 if (l > access_size_max) {
2000 l = access_size_max;
2002 if (l & (l - 1)) {
2003 l = 1 << (qemu_fls(l) - 1);
2006 return l;
2009 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
2010 int len, bool is_write)
2012 hwaddr l;
2013 uint8_t *ptr;
2014 uint64_t val;
2015 hwaddr addr1;
2016 MemoryRegion *mr;
2017 bool error = false;
2019 while (len > 0) {
2020 l = len;
2021 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2023 if (is_write) {
2024 if (!memory_access_is_direct(mr, is_write)) {
2025 l = memory_access_size(mr, l, addr1);
2026 /* XXX: could force current_cpu to NULL to avoid
2027 potential bugs */
2028 switch (l) {
2029 case 8:
2030 /* 64 bit write access */
2031 val = ldq_p(buf);
2032 error |= io_mem_write(mr, addr1, val, 8);
2033 break;
2034 case 4:
2035 /* 32 bit write access */
2036 val = ldl_p(buf);
2037 error |= io_mem_write(mr, addr1, val, 4);
2038 break;
2039 case 2:
2040 /* 16 bit write access */
2041 val = lduw_p(buf);
2042 error |= io_mem_write(mr, addr1, val, 2);
2043 break;
2044 case 1:
2045 /* 8 bit write access */
2046 val = ldub_p(buf);
2047 error |= io_mem_write(mr, addr1, val, 1);
2048 break;
2049 default:
2050 abort();
2052 } else {
2053 addr1 += memory_region_get_ram_addr(mr);
2054 /* RAM case */
2055 ptr = qemu_get_ram_ptr(addr1);
2056 memcpy(ptr, buf, l);
2057 invalidate_and_set_dirty(addr1, l);
2059 } else {
2060 if (!memory_access_is_direct(mr, is_write)) {
2061 /* I/O case */
2062 l = memory_access_size(mr, l, addr1);
2063 switch (l) {
2064 case 8:
2065 /* 64 bit read access */
2066 error |= io_mem_read(mr, addr1, &val, 8);
2067 stq_p(buf, val);
2068 break;
2069 case 4:
2070 /* 32 bit read access */
2071 error |= io_mem_read(mr, addr1, &val, 4);
2072 stl_p(buf, val);
2073 break;
2074 case 2:
2075 /* 16 bit read access */
2076 error |= io_mem_read(mr, addr1, &val, 2);
2077 stw_p(buf, val);
2078 break;
2079 case 1:
2080 /* 8 bit read access */
2081 error |= io_mem_read(mr, addr1, &val, 1);
2082 stb_p(buf, val);
2083 break;
2084 default:
2085 abort();
2087 } else {
2088 /* RAM case */
2089 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2090 memcpy(buf, ptr, l);
2093 len -= l;
2094 buf += l;
2095 addr += l;
2098 return error;
2101 bool address_space_write(AddressSpace *as, hwaddr addr,
2102 const uint8_t *buf, int len)
2104 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2107 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2109 return address_space_rw(as, addr, buf, len, false);
2113 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2114 int len, int is_write)
2116 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2119 enum write_rom_type {
2120 WRITE_DATA,
2121 FLUSH_CACHE,
2124 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2125 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2127 hwaddr l;
2128 uint8_t *ptr;
2129 hwaddr addr1;
2130 MemoryRegion *mr;
2132 while (len > 0) {
2133 l = len;
2134 mr = address_space_translate(as, addr, &addr1, &l, true);
2136 if (!(memory_region_is_ram(mr) ||
2137 memory_region_is_romd(mr))) {
2138 /* do nothing */
2139 } else {
2140 addr1 += memory_region_get_ram_addr(mr);
2141 /* ROM/RAM case */
2142 ptr = qemu_get_ram_ptr(addr1);
2143 switch (type) {
2144 case WRITE_DATA:
2145 memcpy(ptr, buf, l);
2146 invalidate_and_set_dirty(addr1, l);
2147 break;
2148 case FLUSH_CACHE:
2149 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2150 break;
2153 len -= l;
2154 buf += l;
2155 addr += l;
2159 /* used for ROM loading : can write in RAM and ROM */
2160 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2161 const uint8_t *buf, int len)
2163 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2166 void cpu_flush_icache_range(hwaddr start, int len)
2169 * This function should do the same thing as an icache flush that was
2170 * triggered from within the guest. For TCG we are always cache coherent,
2171 * so there is no need to flush anything. For KVM / Xen we need to flush
2172 * the host's instruction cache at least.
2174 if (tcg_enabled()) {
2175 return;
2178 cpu_physical_memory_write_rom_internal(&address_space_memory,
2179 start, NULL, len, FLUSH_CACHE);
2182 typedef struct {
2183 MemoryRegion *mr;
2184 void *buffer;
2185 hwaddr addr;
2186 hwaddr len;
2187 } BounceBuffer;
2189 static BounceBuffer bounce;
2191 typedef struct MapClient {
2192 void *opaque;
2193 void (*callback)(void *opaque);
2194 QLIST_ENTRY(MapClient) link;
2195 } MapClient;
2197 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2198 = QLIST_HEAD_INITIALIZER(map_client_list);
2200 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2202 MapClient *client = g_malloc(sizeof(*client));
2204 client->opaque = opaque;
2205 client->callback = callback;
2206 QLIST_INSERT_HEAD(&map_client_list, client, link);
2207 return client;
2210 static void cpu_unregister_map_client(void *_client)
2212 MapClient *client = (MapClient *)_client;
2214 QLIST_REMOVE(client, link);
2215 g_free(client);
2218 static void cpu_notify_map_clients(void)
2220 MapClient *client;
2222 while (!QLIST_EMPTY(&map_client_list)) {
2223 client = QLIST_FIRST(&map_client_list);
2224 client->callback(client->opaque);
2225 cpu_unregister_map_client(client);
2229 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2231 MemoryRegion *mr;
2232 hwaddr l, xlat;
2234 while (len > 0) {
2235 l = len;
2236 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2237 if (!memory_access_is_direct(mr, is_write)) {
2238 l = memory_access_size(mr, l, addr);
2239 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2240 return false;
2244 len -= l;
2245 addr += l;
2247 return true;
2250 /* Map a physical memory region into a host virtual address.
2251 * May map a subset of the requested range, given by and returned in *plen.
2252 * May return NULL if resources needed to perform the mapping are exhausted.
2253 * Use only for reads OR writes - not for read-modify-write operations.
2254 * Use cpu_register_map_client() to know when retrying the map operation is
2255 * likely to succeed.
2257 void *address_space_map(AddressSpace *as,
2258 hwaddr addr,
2259 hwaddr *plen,
2260 bool is_write)
2262 hwaddr len = *plen;
2263 hwaddr done = 0;
2264 hwaddr l, xlat, base;
2265 MemoryRegion *mr, *this_mr;
2266 ram_addr_t raddr;
2268 if (len == 0) {
2269 return NULL;
2272 l = len;
2273 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2274 if (!memory_access_is_direct(mr, is_write)) {
2275 if (bounce.buffer) {
2276 return NULL;
2278 /* Avoid unbounded allocations */
2279 l = MIN(l, TARGET_PAGE_SIZE);
2280 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2281 bounce.addr = addr;
2282 bounce.len = l;
2284 memory_region_ref(mr);
2285 bounce.mr = mr;
2286 if (!is_write) {
2287 address_space_read(as, addr, bounce.buffer, l);
2290 *plen = l;
2291 return bounce.buffer;
2294 base = xlat;
2295 raddr = memory_region_get_ram_addr(mr);
2297 for (;;) {
2298 len -= l;
2299 addr += l;
2300 done += l;
2301 if (len == 0) {
2302 break;
2305 l = len;
2306 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2307 if (this_mr != mr || xlat != base + done) {
2308 break;
2312 memory_region_ref(mr);
2313 *plen = done;
2314 return qemu_ram_ptr_length(raddr + base, plen);
2317 /* Unmaps a memory region previously mapped by address_space_map().
2318 * Will also mark the memory as dirty if is_write == 1. access_len gives
2319 * the amount of memory that was actually read or written by the caller.
2321 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2322 int is_write, hwaddr access_len)
2324 if (buffer != bounce.buffer) {
2325 MemoryRegion *mr;
2326 ram_addr_t addr1;
2328 mr = qemu_ram_addr_from_host(buffer, &addr1);
2329 assert(mr != NULL);
2330 if (is_write) {
2331 while (access_len) {
2332 unsigned l;
2333 l = TARGET_PAGE_SIZE;
2334 if (l > access_len)
2335 l = access_len;
2336 invalidate_and_set_dirty(addr1, l);
2337 addr1 += l;
2338 access_len -= l;
2341 if (xen_enabled()) {
2342 xen_invalidate_map_cache_entry(buffer);
2344 memory_region_unref(mr);
2345 return;
2347 if (is_write) {
2348 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2350 qemu_vfree(bounce.buffer);
2351 bounce.buffer = NULL;
2352 memory_region_unref(bounce.mr);
2353 cpu_notify_map_clients();
2356 void *cpu_physical_memory_map(hwaddr addr,
2357 hwaddr *plen,
2358 int is_write)
2360 return address_space_map(&address_space_memory, addr, plen, is_write);
2363 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2364 int is_write, hwaddr access_len)
2366 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2369 /* warning: addr must be aligned */
2370 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2371 enum device_endian endian)
2373 uint8_t *ptr;
2374 uint64_t val;
2375 MemoryRegion *mr;
2376 hwaddr l = 4;
2377 hwaddr addr1;
2379 mr = address_space_translate(as, addr, &addr1, &l, false);
2380 if (l < 4 || !memory_access_is_direct(mr, false)) {
2381 /* I/O case */
2382 io_mem_read(mr, addr1, &val, 4);
2383 #if defined(TARGET_WORDS_BIGENDIAN)
2384 if (endian == DEVICE_LITTLE_ENDIAN) {
2385 val = bswap32(val);
2387 #else
2388 if (endian == DEVICE_BIG_ENDIAN) {
2389 val = bswap32(val);
2391 #endif
2392 } else {
2393 /* RAM case */
2394 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2395 & TARGET_PAGE_MASK)
2396 + addr1);
2397 switch (endian) {
2398 case DEVICE_LITTLE_ENDIAN:
2399 val = ldl_le_p(ptr);
2400 break;
2401 case DEVICE_BIG_ENDIAN:
2402 val = ldl_be_p(ptr);
2403 break;
2404 default:
2405 val = ldl_p(ptr);
2406 break;
2409 return val;
2412 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2414 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2417 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2419 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2422 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2424 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2427 /* warning: addr must be aligned */
2428 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2429 enum device_endian endian)
2431 uint8_t *ptr;
2432 uint64_t val;
2433 MemoryRegion *mr;
2434 hwaddr l = 8;
2435 hwaddr addr1;
2437 mr = address_space_translate(as, addr, &addr1, &l,
2438 false);
2439 if (l < 8 || !memory_access_is_direct(mr, false)) {
2440 /* I/O case */
2441 io_mem_read(mr, addr1, &val, 8);
2442 #if defined(TARGET_WORDS_BIGENDIAN)
2443 if (endian == DEVICE_LITTLE_ENDIAN) {
2444 val = bswap64(val);
2446 #else
2447 if (endian == DEVICE_BIG_ENDIAN) {
2448 val = bswap64(val);
2450 #endif
2451 } else {
2452 /* RAM case */
2453 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2454 & TARGET_PAGE_MASK)
2455 + addr1);
2456 switch (endian) {
2457 case DEVICE_LITTLE_ENDIAN:
2458 val = ldq_le_p(ptr);
2459 break;
2460 case DEVICE_BIG_ENDIAN:
2461 val = ldq_be_p(ptr);
2462 break;
2463 default:
2464 val = ldq_p(ptr);
2465 break;
2468 return val;
2471 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2473 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2476 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2478 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2481 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2483 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2486 /* XXX: optimize */
2487 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2489 uint8_t val;
2490 address_space_rw(as, addr, &val, 1, 0);
2491 return val;
2494 /* warning: addr must be aligned */
2495 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2496 enum device_endian endian)
2498 uint8_t *ptr;
2499 uint64_t val;
2500 MemoryRegion *mr;
2501 hwaddr l = 2;
2502 hwaddr addr1;
2504 mr = address_space_translate(as, addr, &addr1, &l,
2505 false);
2506 if (l < 2 || !memory_access_is_direct(mr, false)) {
2507 /* I/O case */
2508 io_mem_read(mr, addr1, &val, 2);
2509 #if defined(TARGET_WORDS_BIGENDIAN)
2510 if (endian == DEVICE_LITTLE_ENDIAN) {
2511 val = bswap16(val);
2513 #else
2514 if (endian == DEVICE_BIG_ENDIAN) {
2515 val = bswap16(val);
2517 #endif
2518 } else {
2519 /* RAM case */
2520 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2521 & TARGET_PAGE_MASK)
2522 + addr1);
2523 switch (endian) {
2524 case DEVICE_LITTLE_ENDIAN:
2525 val = lduw_le_p(ptr);
2526 break;
2527 case DEVICE_BIG_ENDIAN:
2528 val = lduw_be_p(ptr);
2529 break;
2530 default:
2531 val = lduw_p(ptr);
2532 break;
2535 return val;
2538 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2540 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2543 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2545 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2548 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2550 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2553 /* warning: addr must be aligned. The ram page is not masked as dirty
2554 and the code inside is not invalidated. It is useful if the dirty
2555 bits are used to track modified PTEs */
2556 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2558 uint8_t *ptr;
2559 MemoryRegion *mr;
2560 hwaddr l = 4;
2561 hwaddr addr1;
2563 mr = address_space_translate(as, addr, &addr1, &l,
2564 true);
2565 if (l < 4 || !memory_access_is_direct(mr, true)) {
2566 io_mem_write(mr, addr1, val, 4);
2567 } else {
2568 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2569 ptr = qemu_get_ram_ptr(addr1);
2570 stl_p(ptr, val);
2572 if (unlikely(in_migration)) {
2573 if (cpu_physical_memory_is_clean(addr1)) {
2574 /* invalidate code */
2575 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2576 /* set dirty bit */
2577 cpu_physical_memory_set_dirty_flag(addr1,
2578 DIRTY_MEMORY_MIGRATION);
2579 cpu_physical_memory_set_dirty_flag(addr1, DIRTY_MEMORY_VGA);
2585 /* warning: addr must be aligned */
2586 static inline void stl_phys_internal(AddressSpace *as,
2587 hwaddr addr, uint32_t val,
2588 enum device_endian endian)
2590 uint8_t *ptr;
2591 MemoryRegion *mr;
2592 hwaddr l = 4;
2593 hwaddr addr1;
2595 mr = address_space_translate(as, addr, &addr1, &l,
2596 true);
2597 if (l < 4 || !memory_access_is_direct(mr, true)) {
2598 #if defined(TARGET_WORDS_BIGENDIAN)
2599 if (endian == DEVICE_LITTLE_ENDIAN) {
2600 val = bswap32(val);
2602 #else
2603 if (endian == DEVICE_BIG_ENDIAN) {
2604 val = bswap32(val);
2606 #endif
2607 io_mem_write(mr, addr1, val, 4);
2608 } else {
2609 /* RAM case */
2610 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2611 ptr = qemu_get_ram_ptr(addr1);
2612 switch (endian) {
2613 case DEVICE_LITTLE_ENDIAN:
2614 stl_le_p(ptr, val);
2615 break;
2616 case DEVICE_BIG_ENDIAN:
2617 stl_be_p(ptr, val);
2618 break;
2619 default:
2620 stl_p(ptr, val);
2621 break;
2623 invalidate_and_set_dirty(addr1, 4);
2627 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2629 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2632 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2634 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2637 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2639 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2642 /* XXX: optimize */
2643 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2645 uint8_t v = val;
2646 address_space_rw(as, addr, &v, 1, 1);
2649 /* warning: addr must be aligned */
2650 static inline void stw_phys_internal(AddressSpace *as,
2651 hwaddr addr, uint32_t val,
2652 enum device_endian endian)
2654 uint8_t *ptr;
2655 MemoryRegion *mr;
2656 hwaddr l = 2;
2657 hwaddr addr1;
2659 mr = address_space_translate(as, addr, &addr1, &l, true);
2660 if (l < 2 || !memory_access_is_direct(mr, true)) {
2661 #if defined(TARGET_WORDS_BIGENDIAN)
2662 if (endian == DEVICE_LITTLE_ENDIAN) {
2663 val = bswap16(val);
2665 #else
2666 if (endian == DEVICE_BIG_ENDIAN) {
2667 val = bswap16(val);
2669 #endif
2670 io_mem_write(mr, addr1, val, 2);
2671 } else {
2672 /* RAM case */
2673 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2674 ptr = qemu_get_ram_ptr(addr1);
2675 switch (endian) {
2676 case DEVICE_LITTLE_ENDIAN:
2677 stw_le_p(ptr, val);
2678 break;
2679 case DEVICE_BIG_ENDIAN:
2680 stw_be_p(ptr, val);
2681 break;
2682 default:
2683 stw_p(ptr, val);
2684 break;
2686 invalidate_and_set_dirty(addr1, 2);
2690 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2692 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2695 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2697 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2700 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2702 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2705 /* XXX: optimize */
2706 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2708 val = tswap64(val);
2709 address_space_rw(as, addr, (void *) &val, 8, 1);
2712 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2714 val = cpu_to_le64(val);
2715 address_space_rw(as, addr, (void *) &val, 8, 1);
2718 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2720 val = cpu_to_be64(val);
2721 address_space_rw(as, addr, (void *) &val, 8, 1);
2724 /* virtual memory access for debug (includes writing to ROM) */
2725 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2726 uint8_t *buf, int len, int is_write)
2728 int l;
2729 hwaddr phys_addr;
2730 target_ulong page;
2732 while (len > 0) {
2733 page = addr & TARGET_PAGE_MASK;
2734 phys_addr = cpu_get_phys_page_debug(cpu, page);
2735 /* if no physical page mapped, return an error */
2736 if (phys_addr == -1)
2737 return -1;
2738 l = (page + TARGET_PAGE_SIZE) - addr;
2739 if (l > len)
2740 l = len;
2741 phys_addr += (addr & ~TARGET_PAGE_MASK);
2742 if (is_write) {
2743 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
2744 } else {
2745 address_space_rw(cpu->as, phys_addr, buf, l, 0);
2747 len -= l;
2748 buf += l;
2749 addr += l;
2751 return 0;
2753 #endif
2755 #if !defined(CONFIG_USER_ONLY)
2758 * A helper function for the _utterly broken_ virtio device model to find out if
2759 * it's running on a big endian machine. Don't do this at home kids!
2761 bool virtio_is_big_endian(void);
2762 bool virtio_is_big_endian(void)
2764 #if defined(TARGET_WORDS_BIGENDIAN)
2765 return true;
2766 #else
2767 return false;
2768 #endif
2771 #endif
2773 #ifndef CONFIG_USER_ONLY
2774 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2776 MemoryRegion*mr;
2777 hwaddr l = 1;
2779 mr = address_space_translate(&address_space_memory,
2780 phys_addr, &phys_addr, &l, false);
2782 return !(memory_region_is_ram(mr) ||
2783 memory_region_is_romd(mr));
2786 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2788 RAMBlock *block;
2790 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2791 func(block->host, block->offset, block->length, opaque);
2794 #endif