Revert "virtio: don't call device on !vm_running"
[qemu/qmp-unstable.git] / exec.c
blob2794b4ba230b1fd1024e811e23d966d0b388ee81
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #include "hw/qdev.h"
30 #include "qemu/osdep.h"
31 #include "sysemu/kvm.h"
32 #include "sysemu/sysemu.h"
33 #include "hw/xen/xen.h"
34 #include "qemu/timer.h"
35 #include "qemu/config-file.h"
36 #include "qemu/error-report.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
51 #include "exec/memory-internal.h"
52 #include "exec/ram_addr.h"
54 #include "qemu/range.h"
56 //#define DEBUG_SUBPAGE
58 #if !defined(CONFIG_USER_ONLY)
59 static bool in_migration;
61 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
63 static MemoryRegion *system_memory;
64 static MemoryRegion *system_io;
66 AddressSpace address_space_io;
67 AddressSpace address_space_memory;
69 MemoryRegion io_mem_rom, io_mem_notdirty;
70 static MemoryRegion io_mem_unassigned;
72 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
73 #define RAM_PREALLOC (1 << 0)
75 /* RAM is mmap-ed with MAP_SHARED */
76 #define RAM_SHARED (1 << 1)
78 #endif
80 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
81 /* current CPU in the current thread. It is only valid inside
82 cpu_exec() */
83 DEFINE_TLS(CPUState *, current_cpu);
84 /* 0 = Do not count executed instructions.
85 1 = Precise instruction counting.
86 2 = Adaptive rate instruction counting. */
87 int use_icount;
89 #if !defined(CONFIG_USER_ONLY)
91 typedef struct PhysPageEntry PhysPageEntry;
93 struct PhysPageEntry {
94 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
95 uint32_t skip : 6;
96 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
97 uint32_t ptr : 26;
100 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
102 /* Size of the L2 (and L3, etc) page tables. */
103 #define ADDR_SPACE_BITS 64
105 #define P_L2_BITS 9
106 #define P_L2_SIZE (1 << P_L2_BITS)
108 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
110 typedef PhysPageEntry Node[P_L2_SIZE];
112 typedef struct PhysPageMap {
113 unsigned sections_nb;
114 unsigned sections_nb_alloc;
115 unsigned nodes_nb;
116 unsigned nodes_nb_alloc;
117 Node *nodes;
118 MemoryRegionSection *sections;
119 } PhysPageMap;
121 struct AddressSpaceDispatch {
122 /* This is a multi-level map on the physical address space.
123 * The bottom level has pointers to MemoryRegionSections.
125 PhysPageEntry phys_map;
126 PhysPageMap map;
127 AddressSpace *as;
130 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
131 typedef struct subpage_t {
132 MemoryRegion iomem;
133 AddressSpace *as;
134 hwaddr base;
135 uint16_t sub_section[TARGET_PAGE_SIZE];
136 } subpage_t;
138 #define PHYS_SECTION_UNASSIGNED 0
139 #define PHYS_SECTION_NOTDIRTY 1
140 #define PHYS_SECTION_ROM 2
141 #define PHYS_SECTION_WATCH 3
143 static void io_mem_init(void);
144 static void memory_map_init(void);
145 static void tcg_commit(MemoryListener *listener);
147 static MemoryRegion io_mem_watch;
148 #endif
150 #if !defined(CONFIG_USER_ONLY)
152 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
154 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
155 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
156 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
157 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
161 static uint32_t phys_map_node_alloc(PhysPageMap *map)
163 unsigned i;
164 uint32_t ret;
166 ret = map->nodes_nb++;
167 assert(ret != PHYS_MAP_NODE_NIL);
168 assert(ret != map->nodes_nb_alloc);
169 for (i = 0; i < P_L2_SIZE; ++i) {
170 map->nodes[ret][i].skip = 1;
171 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
173 return ret;
176 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
177 hwaddr *index, hwaddr *nb, uint16_t leaf,
178 int level)
180 PhysPageEntry *p;
181 int i;
182 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
184 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
185 lp->ptr = phys_map_node_alloc(map);
186 p = map->nodes[lp->ptr];
187 if (level == 0) {
188 for (i = 0; i < P_L2_SIZE; i++) {
189 p[i].skip = 0;
190 p[i].ptr = PHYS_SECTION_UNASSIGNED;
193 } else {
194 p = map->nodes[lp->ptr];
196 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
198 while (*nb && lp < &p[P_L2_SIZE]) {
199 if ((*index & (step - 1)) == 0 && *nb >= step) {
200 lp->skip = 0;
201 lp->ptr = leaf;
202 *index += step;
203 *nb -= step;
204 } else {
205 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
207 ++lp;
211 static void phys_page_set(AddressSpaceDispatch *d,
212 hwaddr index, hwaddr nb,
213 uint16_t leaf)
215 /* Wildly overreserve - it doesn't matter much. */
216 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
218 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
221 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
222 * and update our entry so we can skip it and go directly to the destination.
224 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
226 unsigned valid_ptr = P_L2_SIZE;
227 int valid = 0;
228 PhysPageEntry *p;
229 int i;
231 if (lp->ptr == PHYS_MAP_NODE_NIL) {
232 return;
235 p = nodes[lp->ptr];
236 for (i = 0; i < P_L2_SIZE; i++) {
237 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
238 continue;
241 valid_ptr = i;
242 valid++;
243 if (p[i].skip) {
244 phys_page_compact(&p[i], nodes, compacted);
248 /* We can only compress if there's only one child. */
249 if (valid != 1) {
250 return;
253 assert(valid_ptr < P_L2_SIZE);
255 /* Don't compress if it won't fit in the # of bits we have. */
256 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
257 return;
260 lp->ptr = p[valid_ptr].ptr;
261 if (!p[valid_ptr].skip) {
262 /* If our only child is a leaf, make this a leaf. */
263 /* By design, we should have made this node a leaf to begin with so we
264 * should never reach here.
265 * But since it's so simple to handle this, let's do it just in case we
266 * change this rule.
268 lp->skip = 0;
269 } else {
270 lp->skip += p[valid_ptr].skip;
274 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
276 DECLARE_BITMAP(compacted, nodes_nb);
278 if (d->phys_map.skip) {
279 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
283 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
284 Node *nodes, MemoryRegionSection *sections)
286 PhysPageEntry *p;
287 hwaddr index = addr >> TARGET_PAGE_BITS;
288 int i;
290 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
291 if (lp.ptr == PHYS_MAP_NODE_NIL) {
292 return &sections[PHYS_SECTION_UNASSIGNED];
294 p = nodes[lp.ptr];
295 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
298 if (sections[lp.ptr].size.hi ||
299 range_covers_byte(sections[lp.ptr].offset_within_address_space,
300 sections[lp.ptr].size.lo, addr)) {
301 return &sections[lp.ptr];
302 } else {
303 return &sections[PHYS_SECTION_UNASSIGNED];
307 bool memory_region_is_unassigned(MemoryRegion *mr)
309 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
310 && mr != &io_mem_watch;
313 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
314 hwaddr addr,
315 bool resolve_subpage)
317 MemoryRegionSection *section;
318 subpage_t *subpage;
320 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
321 if (resolve_subpage && section->mr->subpage) {
322 subpage = container_of(section->mr, subpage_t, iomem);
323 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
325 return section;
328 static MemoryRegionSection *
329 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
330 hwaddr *plen, bool resolve_subpage)
332 MemoryRegionSection *section;
333 Int128 diff;
335 section = address_space_lookup_region(d, addr, resolve_subpage);
336 /* Compute offset within MemoryRegionSection */
337 addr -= section->offset_within_address_space;
339 /* Compute offset within MemoryRegion */
340 *xlat = addr + section->offset_within_region;
342 diff = int128_sub(section->mr->size, int128_make64(addr));
343 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
344 return section;
347 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
349 if (memory_region_is_ram(mr)) {
350 return !(is_write && mr->readonly);
352 if (memory_region_is_romd(mr)) {
353 return !is_write;
356 return false;
359 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
360 hwaddr *xlat, hwaddr *plen,
361 bool is_write)
363 IOMMUTLBEntry iotlb;
364 MemoryRegionSection *section;
365 MemoryRegion *mr;
366 hwaddr len = *plen;
368 for (;;) {
369 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
370 mr = section->mr;
372 if (!mr->iommu_ops) {
373 break;
376 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
377 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
378 | (addr & iotlb.addr_mask));
379 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
380 if (!(iotlb.perm & (1 << is_write))) {
381 mr = &io_mem_unassigned;
382 break;
385 as = iotlb.target_as;
388 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
389 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
390 len = MIN(page, len);
393 *plen = len;
394 *xlat = addr;
395 return mr;
398 MemoryRegionSection *
399 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
400 hwaddr *plen)
402 MemoryRegionSection *section;
403 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
405 assert(!section->mr->iommu_ops);
406 return section;
408 #endif
410 void cpu_exec_init_all(void)
412 #if !defined(CONFIG_USER_ONLY)
413 qemu_mutex_init(&ram_list.mutex);
414 memory_map_init();
415 io_mem_init();
416 #endif
419 #if !defined(CONFIG_USER_ONLY)
421 static int cpu_common_post_load(void *opaque, int version_id)
423 CPUState *cpu = opaque;
425 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
426 version_id is increased. */
427 cpu->interrupt_request &= ~0x01;
428 tlb_flush(cpu, 1);
430 return 0;
433 static int cpu_common_pre_load(void *opaque)
435 CPUState *cpu = opaque;
437 cpu->exception_index = 0;
439 return 0;
442 static bool cpu_common_exception_index_needed(void *opaque)
444 CPUState *cpu = opaque;
446 return cpu->exception_index != 0;
449 static const VMStateDescription vmstate_cpu_common_exception_index = {
450 .name = "cpu_common/exception_index",
451 .version_id = 1,
452 .minimum_version_id = 1,
453 .fields = (VMStateField[]) {
454 VMSTATE_INT32(exception_index, CPUState),
455 VMSTATE_END_OF_LIST()
459 const VMStateDescription vmstate_cpu_common = {
460 .name = "cpu_common",
461 .version_id = 1,
462 .minimum_version_id = 1,
463 .pre_load = cpu_common_pre_load,
464 .post_load = cpu_common_post_load,
465 .fields = (VMStateField[]) {
466 VMSTATE_UINT32(halted, CPUState),
467 VMSTATE_UINT32(interrupt_request, CPUState),
468 VMSTATE_END_OF_LIST()
470 .subsections = (VMStateSubsection[]) {
472 .vmsd = &vmstate_cpu_common_exception_index,
473 .needed = cpu_common_exception_index_needed,
474 } , {
475 /* empty */
480 #endif
482 CPUState *qemu_get_cpu(int index)
484 CPUState *cpu;
486 CPU_FOREACH(cpu) {
487 if (cpu->cpu_index == index) {
488 return cpu;
492 return NULL;
495 #if !defined(CONFIG_USER_ONLY)
496 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
498 /* We only support one address space per cpu at the moment. */
499 assert(cpu->as == as);
501 if (cpu->tcg_as_listener) {
502 memory_listener_unregister(cpu->tcg_as_listener);
503 } else {
504 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
506 cpu->tcg_as_listener->commit = tcg_commit;
507 memory_listener_register(cpu->tcg_as_listener, as);
509 #endif
511 void cpu_exec_init(CPUArchState *env)
513 CPUState *cpu = ENV_GET_CPU(env);
514 CPUClass *cc = CPU_GET_CLASS(cpu);
515 CPUState *some_cpu;
516 int cpu_index;
518 #if defined(CONFIG_USER_ONLY)
519 cpu_list_lock();
520 #endif
521 cpu_index = 0;
522 CPU_FOREACH(some_cpu) {
523 cpu_index++;
525 cpu->cpu_index = cpu_index;
526 cpu->numa_node = 0;
527 QTAILQ_INIT(&cpu->breakpoints);
528 QTAILQ_INIT(&cpu->watchpoints);
529 #ifndef CONFIG_USER_ONLY
530 cpu->as = &address_space_memory;
531 cpu->thread_id = qemu_get_thread_id();
532 #endif
533 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
534 #if defined(CONFIG_USER_ONLY)
535 cpu_list_unlock();
536 #endif
537 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
538 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
540 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
541 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
542 cpu_save, cpu_load, env);
543 assert(cc->vmsd == NULL);
544 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
545 #endif
546 if (cc->vmsd != NULL) {
547 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
551 #if defined(TARGET_HAS_ICE)
552 #if defined(CONFIG_USER_ONLY)
553 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
555 tb_invalidate_phys_page_range(pc, pc + 1, 0);
557 #else
558 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
560 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
561 if (phys != -1) {
562 tb_invalidate_phys_addr(cpu->as,
563 phys | (pc & ~TARGET_PAGE_MASK));
566 #endif
567 #endif /* TARGET_HAS_ICE */
569 #if defined(CONFIG_USER_ONLY)
570 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
575 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
576 int flags)
578 return -ENOSYS;
581 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
585 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
586 int flags, CPUWatchpoint **watchpoint)
588 return -ENOSYS;
590 #else
591 /* Add a watchpoint. */
592 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
593 int flags, CPUWatchpoint **watchpoint)
595 CPUWatchpoint *wp;
597 /* forbid ranges which are empty or run off the end of the address space */
598 if (len == 0 || (addr + len - 1) <= addr) {
599 error_report("tried to set invalid watchpoint at %"
600 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
601 return -EINVAL;
603 wp = g_malloc(sizeof(*wp));
605 wp->vaddr = addr;
606 wp->len = len;
607 wp->flags = flags;
609 /* keep all GDB-injected watchpoints in front */
610 if (flags & BP_GDB) {
611 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
612 } else {
613 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
616 tlb_flush_page(cpu, addr);
618 if (watchpoint)
619 *watchpoint = wp;
620 return 0;
623 /* Remove a specific watchpoint. */
624 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
625 int flags)
627 CPUWatchpoint *wp;
629 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
630 if (addr == wp->vaddr && len == wp->len
631 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
632 cpu_watchpoint_remove_by_ref(cpu, wp);
633 return 0;
636 return -ENOENT;
639 /* Remove a specific watchpoint by reference. */
640 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
642 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
644 tlb_flush_page(cpu, watchpoint->vaddr);
646 g_free(watchpoint);
649 /* Remove all matching watchpoints. */
650 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
652 CPUWatchpoint *wp, *next;
654 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
655 if (wp->flags & mask) {
656 cpu_watchpoint_remove_by_ref(cpu, wp);
661 /* Return true if this watchpoint address matches the specified
662 * access (ie the address range covered by the watchpoint overlaps
663 * partially or completely with the address range covered by the
664 * access).
666 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
667 vaddr addr,
668 vaddr len)
670 /* We know the lengths are non-zero, but a little caution is
671 * required to avoid errors in the case where the range ends
672 * exactly at the top of the address space and so addr + len
673 * wraps round to zero.
675 vaddr wpend = wp->vaddr + wp->len - 1;
676 vaddr addrend = addr + len - 1;
678 return !(addr > wpend || wp->vaddr > addrend);
681 #endif
683 /* Add a breakpoint. */
684 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
685 CPUBreakpoint **breakpoint)
687 #if defined(TARGET_HAS_ICE)
688 CPUBreakpoint *bp;
690 bp = g_malloc(sizeof(*bp));
692 bp->pc = pc;
693 bp->flags = flags;
695 /* keep all GDB-injected breakpoints in front */
696 if (flags & BP_GDB) {
697 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
698 } else {
699 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
702 breakpoint_invalidate(cpu, pc);
704 if (breakpoint) {
705 *breakpoint = bp;
707 return 0;
708 #else
709 return -ENOSYS;
710 #endif
713 /* Remove a specific breakpoint. */
714 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
716 #if defined(TARGET_HAS_ICE)
717 CPUBreakpoint *bp;
719 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
720 if (bp->pc == pc && bp->flags == flags) {
721 cpu_breakpoint_remove_by_ref(cpu, bp);
722 return 0;
725 return -ENOENT;
726 #else
727 return -ENOSYS;
728 #endif
731 /* Remove a specific breakpoint by reference. */
732 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
734 #if defined(TARGET_HAS_ICE)
735 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
737 breakpoint_invalidate(cpu, breakpoint->pc);
739 g_free(breakpoint);
740 #endif
743 /* Remove all matching breakpoints. */
744 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
746 #if defined(TARGET_HAS_ICE)
747 CPUBreakpoint *bp, *next;
749 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
750 if (bp->flags & mask) {
751 cpu_breakpoint_remove_by_ref(cpu, bp);
754 #endif
757 /* enable or disable single step mode. EXCP_DEBUG is returned by the
758 CPU loop after each instruction */
759 void cpu_single_step(CPUState *cpu, int enabled)
761 #if defined(TARGET_HAS_ICE)
762 if (cpu->singlestep_enabled != enabled) {
763 cpu->singlestep_enabled = enabled;
764 if (kvm_enabled()) {
765 kvm_update_guest_debug(cpu, 0);
766 } else {
767 /* must flush all the translated code to avoid inconsistencies */
768 /* XXX: only flush what is necessary */
769 CPUArchState *env = cpu->env_ptr;
770 tb_flush(env);
773 #endif
776 void cpu_abort(CPUState *cpu, const char *fmt, ...)
778 va_list ap;
779 va_list ap2;
781 va_start(ap, fmt);
782 va_copy(ap2, ap);
783 fprintf(stderr, "qemu: fatal: ");
784 vfprintf(stderr, fmt, ap);
785 fprintf(stderr, "\n");
786 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
787 if (qemu_log_enabled()) {
788 qemu_log("qemu: fatal: ");
789 qemu_log_vprintf(fmt, ap2);
790 qemu_log("\n");
791 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
792 qemu_log_flush();
793 qemu_log_close();
795 va_end(ap2);
796 va_end(ap);
797 #if defined(CONFIG_USER_ONLY)
799 struct sigaction act;
800 sigfillset(&act.sa_mask);
801 act.sa_handler = SIG_DFL;
802 sigaction(SIGABRT, &act, NULL);
804 #endif
805 abort();
808 #if !defined(CONFIG_USER_ONLY)
809 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
811 RAMBlock *block;
813 /* The list is protected by the iothread lock here. */
814 block = ram_list.mru_block;
815 if (block && addr - block->offset < block->length) {
816 goto found;
818 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
819 if (addr - block->offset < block->length) {
820 goto found;
824 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
825 abort();
827 found:
828 ram_list.mru_block = block;
829 return block;
832 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
834 ram_addr_t start1;
835 RAMBlock *block;
836 ram_addr_t end;
838 end = TARGET_PAGE_ALIGN(start + length);
839 start &= TARGET_PAGE_MASK;
841 block = qemu_get_ram_block(start);
842 assert(block == qemu_get_ram_block(end - 1));
843 start1 = (uintptr_t)block->host + (start - block->offset);
844 cpu_tlb_reset_dirty_all(start1, length);
847 /* Note: start and end must be within the same ram block. */
848 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
849 unsigned client)
851 if (length == 0)
852 return;
853 cpu_physical_memory_clear_dirty_range(start, length, client);
855 if (tcg_enabled()) {
856 tlb_reset_dirty_range_all(start, length);
860 static void cpu_physical_memory_set_dirty_tracking(bool enable)
862 in_migration = enable;
865 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
866 MemoryRegionSection *section,
867 target_ulong vaddr,
868 hwaddr paddr, hwaddr xlat,
869 int prot,
870 target_ulong *address)
872 hwaddr iotlb;
873 CPUWatchpoint *wp;
875 if (memory_region_is_ram(section->mr)) {
876 /* Normal RAM. */
877 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
878 + xlat;
879 if (!section->readonly) {
880 iotlb |= PHYS_SECTION_NOTDIRTY;
881 } else {
882 iotlb |= PHYS_SECTION_ROM;
884 } else {
885 iotlb = section - section->address_space->dispatch->map.sections;
886 iotlb += xlat;
889 /* Make accesses to pages with watchpoints go via the
890 watchpoint trap routines. */
891 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
892 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
893 /* Avoid trapping reads of pages with a write breakpoint. */
894 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
895 iotlb = PHYS_SECTION_WATCH + paddr;
896 *address |= TLB_MMIO;
897 break;
902 return iotlb;
904 #endif /* defined(CONFIG_USER_ONLY) */
906 #if !defined(CONFIG_USER_ONLY)
908 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
909 uint16_t section);
910 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
912 static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
915 * Set a custom physical guest memory alloator.
916 * Accelerators with unusual needs may need this. Hopefully, we can
917 * get rid of it eventually.
919 void phys_mem_set_alloc(void *(*alloc)(size_t))
921 phys_mem_alloc = alloc;
924 static uint16_t phys_section_add(PhysPageMap *map,
925 MemoryRegionSection *section)
927 /* The physical section number is ORed with a page-aligned
928 * pointer to produce the iotlb entries. Thus it should
929 * never overflow into the page-aligned value.
931 assert(map->sections_nb < TARGET_PAGE_SIZE);
933 if (map->sections_nb == map->sections_nb_alloc) {
934 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
935 map->sections = g_renew(MemoryRegionSection, map->sections,
936 map->sections_nb_alloc);
938 map->sections[map->sections_nb] = *section;
939 memory_region_ref(section->mr);
940 return map->sections_nb++;
943 static void phys_section_destroy(MemoryRegion *mr)
945 memory_region_unref(mr);
947 if (mr->subpage) {
948 subpage_t *subpage = container_of(mr, subpage_t, iomem);
949 object_unref(OBJECT(&subpage->iomem));
950 g_free(subpage);
954 static void phys_sections_free(PhysPageMap *map)
956 while (map->sections_nb > 0) {
957 MemoryRegionSection *section = &map->sections[--map->sections_nb];
958 phys_section_destroy(section->mr);
960 g_free(map->sections);
961 g_free(map->nodes);
964 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
966 subpage_t *subpage;
967 hwaddr base = section->offset_within_address_space
968 & TARGET_PAGE_MASK;
969 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
970 d->map.nodes, d->map.sections);
971 MemoryRegionSection subsection = {
972 .offset_within_address_space = base,
973 .size = int128_make64(TARGET_PAGE_SIZE),
975 hwaddr start, end;
977 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
979 if (!(existing->mr->subpage)) {
980 subpage = subpage_init(d->as, base);
981 subsection.address_space = d->as;
982 subsection.mr = &subpage->iomem;
983 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
984 phys_section_add(&d->map, &subsection));
985 } else {
986 subpage = container_of(existing->mr, subpage_t, iomem);
988 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
989 end = start + int128_get64(section->size) - 1;
990 subpage_register(subpage, start, end,
991 phys_section_add(&d->map, section));
995 static void register_multipage(AddressSpaceDispatch *d,
996 MemoryRegionSection *section)
998 hwaddr start_addr = section->offset_within_address_space;
999 uint16_t section_index = phys_section_add(&d->map, section);
1000 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1001 TARGET_PAGE_BITS));
1003 assert(num_pages);
1004 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1007 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1009 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1010 AddressSpaceDispatch *d = as->next_dispatch;
1011 MemoryRegionSection now = *section, remain = *section;
1012 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1014 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1015 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1016 - now.offset_within_address_space;
1018 now.size = int128_min(int128_make64(left), now.size);
1019 register_subpage(d, &now);
1020 } else {
1021 now.size = int128_zero();
1023 while (int128_ne(remain.size, now.size)) {
1024 remain.size = int128_sub(remain.size, now.size);
1025 remain.offset_within_address_space += int128_get64(now.size);
1026 remain.offset_within_region += int128_get64(now.size);
1027 now = remain;
1028 if (int128_lt(remain.size, page_size)) {
1029 register_subpage(d, &now);
1030 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1031 now.size = page_size;
1032 register_subpage(d, &now);
1033 } else {
1034 now.size = int128_and(now.size, int128_neg(page_size));
1035 register_multipage(d, &now);
1040 void qemu_flush_coalesced_mmio_buffer(void)
1042 if (kvm_enabled())
1043 kvm_flush_coalesced_mmio_buffer();
1046 void qemu_mutex_lock_ramlist(void)
1048 qemu_mutex_lock(&ram_list.mutex);
1051 void qemu_mutex_unlock_ramlist(void)
1053 qemu_mutex_unlock(&ram_list.mutex);
1056 #ifdef __linux__
1058 #include <sys/vfs.h>
1060 #define HUGETLBFS_MAGIC 0x958458f6
1062 static long gethugepagesize(const char *path)
1064 struct statfs fs;
1065 int ret;
1067 do {
1068 ret = statfs(path, &fs);
1069 } while (ret != 0 && errno == EINTR);
1071 if (ret != 0) {
1072 perror(path);
1073 return 0;
1076 if (fs.f_type != HUGETLBFS_MAGIC)
1077 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1079 return fs.f_bsize;
1082 static void *file_ram_alloc(RAMBlock *block,
1083 ram_addr_t memory,
1084 const char *path,
1085 Error **errp)
1087 char *filename;
1088 char *sanitized_name;
1089 char *c;
1090 void *area;
1091 int fd;
1092 unsigned long hpagesize;
1094 hpagesize = gethugepagesize(path);
1095 if (!hpagesize) {
1096 goto error;
1099 if (memory < hpagesize) {
1100 return NULL;
1103 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1104 error_setg(errp,
1105 "host lacks kvm mmu notifiers, -mem-path unsupported");
1106 goto error;
1109 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1110 sanitized_name = g_strdup(memory_region_name(block->mr));
1111 for (c = sanitized_name; *c != '\0'; c++) {
1112 if (*c == '/')
1113 *c = '_';
1116 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1117 sanitized_name);
1118 g_free(sanitized_name);
1120 fd = mkstemp(filename);
1121 if (fd < 0) {
1122 error_setg_errno(errp, errno,
1123 "unable to create backing store for hugepages");
1124 g_free(filename);
1125 goto error;
1127 unlink(filename);
1128 g_free(filename);
1130 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1133 * ftruncate is not supported by hugetlbfs in older
1134 * hosts, so don't bother bailing out on errors.
1135 * If anything goes wrong with it under other filesystems,
1136 * mmap will fail.
1138 if (ftruncate(fd, memory)) {
1139 perror("ftruncate");
1142 area = mmap(0, memory, PROT_READ | PROT_WRITE,
1143 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
1144 fd, 0);
1145 if (area == MAP_FAILED) {
1146 error_setg_errno(errp, errno,
1147 "unable to map backing store for hugepages");
1148 close(fd);
1149 goto error;
1152 if (mem_prealloc) {
1153 os_mem_prealloc(fd, area, memory);
1156 block->fd = fd;
1157 return area;
1159 error:
1160 if (mem_prealloc) {
1161 exit(1);
1163 return NULL;
1165 #endif
1167 static ram_addr_t find_ram_offset(ram_addr_t size)
1169 RAMBlock *block, *next_block;
1170 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1172 assert(size != 0); /* it would hand out same offset multiple times */
1174 if (QTAILQ_EMPTY(&ram_list.blocks))
1175 return 0;
1177 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1178 ram_addr_t end, next = RAM_ADDR_MAX;
1180 end = block->offset + block->length;
1182 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1183 if (next_block->offset >= end) {
1184 next = MIN(next, next_block->offset);
1187 if (next - end >= size && next - end < mingap) {
1188 offset = end;
1189 mingap = next - end;
1193 if (offset == RAM_ADDR_MAX) {
1194 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1195 (uint64_t)size);
1196 abort();
1199 return offset;
1202 ram_addr_t last_ram_offset(void)
1204 RAMBlock *block;
1205 ram_addr_t last = 0;
1207 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1208 last = MAX(last, block->offset + block->length);
1210 return last;
1213 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1215 int ret;
1217 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1218 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1219 "dump-guest-core", true)) {
1220 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1221 if (ret) {
1222 perror("qemu_madvise");
1223 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1224 "but dump_guest_core=off specified\n");
1229 static RAMBlock *find_ram_block(ram_addr_t addr)
1231 RAMBlock *block;
1233 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1234 if (block->offset == addr) {
1235 return block;
1239 return NULL;
1242 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1244 RAMBlock *new_block = find_ram_block(addr);
1245 RAMBlock *block;
1247 assert(new_block);
1248 assert(!new_block->idstr[0]);
1250 if (dev) {
1251 char *id = qdev_get_dev_path(dev);
1252 if (id) {
1253 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1254 g_free(id);
1257 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1259 /* This assumes the iothread lock is taken here too. */
1260 qemu_mutex_lock_ramlist();
1261 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1262 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1263 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1264 new_block->idstr);
1265 abort();
1268 qemu_mutex_unlock_ramlist();
1271 void qemu_ram_unset_idstr(ram_addr_t addr)
1273 RAMBlock *block = find_ram_block(addr);
1275 if (block) {
1276 memset(block->idstr, 0, sizeof(block->idstr));
1280 static int memory_try_enable_merging(void *addr, size_t len)
1282 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1283 /* disabled by the user */
1284 return 0;
1287 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1290 static ram_addr_t ram_block_add(RAMBlock *new_block)
1292 RAMBlock *block;
1293 ram_addr_t old_ram_size, new_ram_size;
1295 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1297 /* This assumes the iothread lock is taken here too. */
1298 qemu_mutex_lock_ramlist();
1299 new_block->offset = find_ram_offset(new_block->length);
1301 if (!new_block->host) {
1302 if (xen_enabled()) {
1303 xen_ram_alloc(new_block->offset, new_block->length, new_block->mr);
1304 } else {
1305 new_block->host = phys_mem_alloc(new_block->length);
1306 if (!new_block->host) {
1307 fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
1308 memory_region_name(new_block->mr), strerror(errno));
1309 exit(1);
1311 memory_try_enable_merging(new_block->host, new_block->length);
1315 /* Keep the list sorted from biggest to smallest block. */
1316 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1317 if (block->length < new_block->length) {
1318 break;
1321 if (block) {
1322 QTAILQ_INSERT_BEFORE(block, new_block, next);
1323 } else {
1324 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1326 ram_list.mru_block = NULL;
1328 ram_list.version++;
1329 qemu_mutex_unlock_ramlist();
1331 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1333 if (new_ram_size > old_ram_size) {
1334 int i;
1335 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1336 ram_list.dirty_memory[i] =
1337 bitmap_zero_extend(ram_list.dirty_memory[i],
1338 old_ram_size, new_ram_size);
1341 cpu_physical_memory_set_dirty_range(new_block->offset, new_block->length);
1343 qemu_ram_setup_dump(new_block->host, new_block->length);
1344 qemu_madvise(new_block->host, new_block->length, QEMU_MADV_HUGEPAGE);
1345 qemu_madvise(new_block->host, new_block->length, QEMU_MADV_DONTFORK);
1347 if (kvm_enabled()) {
1348 kvm_setup_guest_memory(new_block->host, new_block->length);
1351 return new_block->offset;
1354 #ifdef __linux__
1355 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1356 bool share, const char *mem_path,
1357 Error **errp)
1359 RAMBlock *new_block;
1361 if (xen_enabled()) {
1362 error_setg(errp, "-mem-path not supported with Xen");
1363 return -1;
1366 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1368 * file_ram_alloc() needs to allocate just like
1369 * phys_mem_alloc, but we haven't bothered to provide
1370 * a hook there.
1372 error_setg(errp,
1373 "-mem-path not supported with this accelerator");
1374 return -1;
1377 size = TARGET_PAGE_ALIGN(size);
1378 new_block = g_malloc0(sizeof(*new_block));
1379 new_block->mr = mr;
1380 new_block->length = size;
1381 new_block->flags = share ? RAM_SHARED : 0;
1382 new_block->host = file_ram_alloc(new_block, size,
1383 mem_path, errp);
1384 if (!new_block->host) {
1385 g_free(new_block);
1386 return -1;
1389 return ram_block_add(new_block);
1391 #endif
1393 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1394 MemoryRegion *mr)
1396 RAMBlock *new_block;
1398 size = TARGET_PAGE_ALIGN(size);
1399 new_block = g_malloc0(sizeof(*new_block));
1400 new_block->mr = mr;
1401 new_block->length = size;
1402 new_block->fd = -1;
1403 new_block->host = host;
1404 if (host) {
1405 new_block->flags |= RAM_PREALLOC;
1407 return ram_block_add(new_block);
1410 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1412 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1415 void qemu_ram_free_from_ptr(ram_addr_t addr)
1417 RAMBlock *block;
1419 /* This assumes the iothread lock is taken here too. */
1420 qemu_mutex_lock_ramlist();
1421 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1422 if (addr == block->offset) {
1423 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1424 ram_list.mru_block = NULL;
1425 ram_list.version++;
1426 g_free(block);
1427 break;
1430 qemu_mutex_unlock_ramlist();
1433 void qemu_ram_free(ram_addr_t addr)
1435 RAMBlock *block;
1437 /* This assumes the iothread lock is taken here too. */
1438 qemu_mutex_lock_ramlist();
1439 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1440 if (addr == block->offset) {
1441 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1442 ram_list.mru_block = NULL;
1443 ram_list.version++;
1444 if (block->flags & RAM_PREALLOC) {
1446 } else if (xen_enabled()) {
1447 xen_invalidate_map_cache_entry(block->host);
1448 #ifndef _WIN32
1449 } else if (block->fd >= 0) {
1450 munmap(block->host, block->length);
1451 close(block->fd);
1452 #endif
1453 } else {
1454 qemu_anon_ram_free(block->host, block->length);
1456 g_free(block);
1457 break;
1460 qemu_mutex_unlock_ramlist();
1464 #ifndef _WIN32
1465 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1467 RAMBlock *block;
1468 ram_addr_t offset;
1469 int flags;
1470 void *area, *vaddr;
1472 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1473 offset = addr - block->offset;
1474 if (offset < block->length) {
1475 vaddr = block->host + offset;
1476 if (block->flags & RAM_PREALLOC) {
1478 } else if (xen_enabled()) {
1479 abort();
1480 } else {
1481 flags = MAP_FIXED;
1482 munmap(vaddr, length);
1483 if (block->fd >= 0) {
1484 flags |= (block->flags & RAM_SHARED ?
1485 MAP_SHARED : MAP_PRIVATE);
1486 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1487 flags, block->fd, offset);
1488 } else {
1490 * Remap needs to match alloc. Accelerators that
1491 * set phys_mem_alloc never remap. If they did,
1492 * we'd need a remap hook here.
1494 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1496 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1497 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1498 flags, -1, 0);
1500 if (area != vaddr) {
1501 fprintf(stderr, "Could not remap addr: "
1502 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1503 length, addr);
1504 exit(1);
1506 memory_try_enable_merging(vaddr, length);
1507 qemu_ram_setup_dump(vaddr, length);
1509 return;
1513 #endif /* !_WIN32 */
1515 int qemu_get_ram_fd(ram_addr_t addr)
1517 RAMBlock *block = qemu_get_ram_block(addr);
1519 return block->fd;
1522 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1524 RAMBlock *block = qemu_get_ram_block(addr);
1526 return block->host;
1529 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1530 With the exception of the softmmu code in this file, this should
1531 only be used for local memory (e.g. video ram) that the device owns,
1532 and knows it isn't going to access beyond the end of the block.
1534 It should not be used for general purpose DMA.
1535 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1537 void *qemu_get_ram_ptr(ram_addr_t addr)
1539 RAMBlock *block = qemu_get_ram_block(addr);
1541 if (xen_enabled()) {
1542 /* We need to check if the requested address is in the RAM
1543 * because we don't want to map the entire memory in QEMU.
1544 * In that case just map until the end of the page.
1546 if (block->offset == 0) {
1547 return xen_map_cache(addr, 0, 0);
1548 } else if (block->host == NULL) {
1549 block->host =
1550 xen_map_cache(block->offset, block->length, 1);
1553 return block->host + (addr - block->offset);
1556 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1557 * but takes a size argument */
1558 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1560 if (*size == 0) {
1561 return NULL;
1563 if (xen_enabled()) {
1564 return xen_map_cache(addr, *size, 1);
1565 } else {
1566 RAMBlock *block;
1568 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1569 if (addr - block->offset < block->length) {
1570 if (addr - block->offset + *size > block->length)
1571 *size = block->length - addr + block->offset;
1572 return block->host + (addr - block->offset);
1576 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1577 abort();
1581 /* Some of the softmmu routines need to translate from a host pointer
1582 (typically a TLB entry) back to a ram offset. */
1583 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1585 RAMBlock *block;
1586 uint8_t *host = ptr;
1588 if (xen_enabled()) {
1589 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1590 return qemu_get_ram_block(*ram_addr)->mr;
1593 block = ram_list.mru_block;
1594 if (block && block->host && host - block->host < block->length) {
1595 goto found;
1598 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1599 /* This case append when the block is not mapped. */
1600 if (block->host == NULL) {
1601 continue;
1603 if (host - block->host < block->length) {
1604 goto found;
1608 return NULL;
1610 found:
1611 *ram_addr = block->offset + (host - block->host);
1612 return block->mr;
1615 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1616 uint64_t val, unsigned size)
1618 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1619 tb_invalidate_phys_page_fast(ram_addr, size);
1621 switch (size) {
1622 case 1:
1623 stb_p(qemu_get_ram_ptr(ram_addr), val);
1624 break;
1625 case 2:
1626 stw_p(qemu_get_ram_ptr(ram_addr), val);
1627 break;
1628 case 4:
1629 stl_p(qemu_get_ram_ptr(ram_addr), val);
1630 break;
1631 default:
1632 abort();
1634 cpu_physical_memory_set_dirty_range_nocode(ram_addr, size);
1635 /* we remove the notdirty callback only if the code has been
1636 flushed */
1637 if (!cpu_physical_memory_is_clean(ram_addr)) {
1638 CPUArchState *env = current_cpu->env_ptr;
1639 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1643 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1644 unsigned size, bool is_write)
1646 return is_write;
1649 static const MemoryRegionOps notdirty_mem_ops = {
1650 .write = notdirty_mem_write,
1651 .valid.accepts = notdirty_mem_accepts,
1652 .endianness = DEVICE_NATIVE_ENDIAN,
1655 /* Generate a debug exception if a watchpoint has been hit. */
1656 static void check_watchpoint(int offset, int len, int flags)
1658 CPUState *cpu = current_cpu;
1659 CPUArchState *env = cpu->env_ptr;
1660 target_ulong pc, cs_base;
1661 target_ulong vaddr;
1662 CPUWatchpoint *wp;
1663 int cpu_flags;
1665 if (cpu->watchpoint_hit) {
1666 /* We re-entered the check after replacing the TB. Now raise
1667 * the debug interrupt so that is will trigger after the
1668 * current instruction. */
1669 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1670 return;
1672 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1673 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1674 if (cpu_watchpoint_address_matches(wp, vaddr, len)
1675 && (wp->flags & flags)) {
1676 if (flags == BP_MEM_READ) {
1677 wp->flags |= BP_WATCHPOINT_HIT_READ;
1678 } else {
1679 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
1681 wp->hitaddr = vaddr;
1682 if (!cpu->watchpoint_hit) {
1683 cpu->watchpoint_hit = wp;
1684 tb_check_watchpoint(cpu);
1685 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1686 cpu->exception_index = EXCP_DEBUG;
1687 cpu_loop_exit(cpu);
1688 } else {
1689 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1690 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1691 cpu_resume_from_signal(cpu, NULL);
1694 } else {
1695 wp->flags &= ~BP_WATCHPOINT_HIT;
1700 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1701 so these check for a hit then pass through to the normal out-of-line
1702 phys routines. */
1703 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1704 unsigned size)
1706 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_READ);
1707 switch (size) {
1708 case 1: return ldub_phys(&address_space_memory, addr);
1709 case 2: return lduw_phys(&address_space_memory, addr);
1710 case 4: return ldl_phys(&address_space_memory, addr);
1711 default: abort();
1715 static void watch_mem_write(void *opaque, hwaddr addr,
1716 uint64_t val, unsigned size)
1718 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_WRITE);
1719 switch (size) {
1720 case 1:
1721 stb_phys(&address_space_memory, addr, val);
1722 break;
1723 case 2:
1724 stw_phys(&address_space_memory, addr, val);
1725 break;
1726 case 4:
1727 stl_phys(&address_space_memory, addr, val);
1728 break;
1729 default: abort();
1733 static const MemoryRegionOps watch_mem_ops = {
1734 .read = watch_mem_read,
1735 .write = watch_mem_write,
1736 .endianness = DEVICE_NATIVE_ENDIAN,
1739 static uint64_t subpage_read(void *opaque, hwaddr addr,
1740 unsigned len)
1742 subpage_t *subpage = opaque;
1743 uint8_t buf[4];
1745 #if defined(DEBUG_SUBPAGE)
1746 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1747 subpage, len, addr);
1748 #endif
1749 address_space_read(subpage->as, addr + subpage->base, buf, len);
1750 switch (len) {
1751 case 1:
1752 return ldub_p(buf);
1753 case 2:
1754 return lduw_p(buf);
1755 case 4:
1756 return ldl_p(buf);
1757 default:
1758 abort();
1762 static void subpage_write(void *opaque, hwaddr addr,
1763 uint64_t value, unsigned len)
1765 subpage_t *subpage = opaque;
1766 uint8_t buf[4];
1768 #if defined(DEBUG_SUBPAGE)
1769 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1770 " value %"PRIx64"\n",
1771 __func__, subpage, len, addr, value);
1772 #endif
1773 switch (len) {
1774 case 1:
1775 stb_p(buf, value);
1776 break;
1777 case 2:
1778 stw_p(buf, value);
1779 break;
1780 case 4:
1781 stl_p(buf, value);
1782 break;
1783 default:
1784 abort();
1786 address_space_write(subpage->as, addr + subpage->base, buf, len);
1789 static bool subpage_accepts(void *opaque, hwaddr addr,
1790 unsigned len, bool is_write)
1792 subpage_t *subpage = opaque;
1793 #if defined(DEBUG_SUBPAGE)
1794 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1795 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1796 #endif
1798 return address_space_access_valid(subpage->as, addr + subpage->base,
1799 len, is_write);
1802 static const MemoryRegionOps subpage_ops = {
1803 .read = subpage_read,
1804 .write = subpage_write,
1805 .valid.accepts = subpage_accepts,
1806 .endianness = DEVICE_NATIVE_ENDIAN,
1809 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1810 uint16_t section)
1812 int idx, eidx;
1814 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1815 return -1;
1816 idx = SUBPAGE_IDX(start);
1817 eidx = SUBPAGE_IDX(end);
1818 #if defined(DEBUG_SUBPAGE)
1819 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1820 __func__, mmio, start, end, idx, eidx, section);
1821 #endif
1822 for (; idx <= eidx; idx++) {
1823 mmio->sub_section[idx] = section;
1826 return 0;
1829 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1831 subpage_t *mmio;
1833 mmio = g_malloc0(sizeof(subpage_t));
1835 mmio->as = as;
1836 mmio->base = base;
1837 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1838 NULL, TARGET_PAGE_SIZE);
1839 mmio->iomem.subpage = true;
1840 #if defined(DEBUG_SUBPAGE)
1841 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1842 mmio, base, TARGET_PAGE_SIZE);
1843 #endif
1844 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1846 return mmio;
1849 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
1850 MemoryRegion *mr)
1852 assert(as);
1853 MemoryRegionSection section = {
1854 .address_space = as,
1855 .mr = mr,
1856 .offset_within_address_space = 0,
1857 .offset_within_region = 0,
1858 .size = int128_2_64(),
1861 return phys_section_add(map, &section);
1864 MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1866 return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1869 static void io_mem_init(void)
1871 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
1872 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1873 NULL, UINT64_MAX);
1874 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1875 NULL, UINT64_MAX);
1876 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1877 NULL, UINT64_MAX);
1880 static void mem_begin(MemoryListener *listener)
1882 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1883 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1884 uint16_t n;
1886 n = dummy_section(&d->map, as, &io_mem_unassigned);
1887 assert(n == PHYS_SECTION_UNASSIGNED);
1888 n = dummy_section(&d->map, as, &io_mem_notdirty);
1889 assert(n == PHYS_SECTION_NOTDIRTY);
1890 n = dummy_section(&d->map, as, &io_mem_rom);
1891 assert(n == PHYS_SECTION_ROM);
1892 n = dummy_section(&d->map, as, &io_mem_watch);
1893 assert(n == PHYS_SECTION_WATCH);
1895 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1896 d->as = as;
1897 as->next_dispatch = d;
1900 static void mem_commit(MemoryListener *listener)
1902 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1903 AddressSpaceDispatch *cur = as->dispatch;
1904 AddressSpaceDispatch *next = as->next_dispatch;
1906 phys_page_compact_all(next, next->map.nodes_nb);
1908 as->dispatch = next;
1910 if (cur) {
1911 phys_sections_free(&cur->map);
1912 g_free(cur);
1916 static void tcg_commit(MemoryListener *listener)
1918 CPUState *cpu;
1920 /* since each CPU stores ram addresses in its TLB cache, we must
1921 reset the modified entries */
1922 /* XXX: slow ! */
1923 CPU_FOREACH(cpu) {
1924 /* FIXME: Disentangle the cpu.h circular files deps so we can
1925 directly get the right CPU from listener. */
1926 if (cpu->tcg_as_listener != listener) {
1927 continue;
1929 tlb_flush(cpu, 1);
1933 static void core_log_global_start(MemoryListener *listener)
1935 cpu_physical_memory_set_dirty_tracking(true);
1938 static void core_log_global_stop(MemoryListener *listener)
1940 cpu_physical_memory_set_dirty_tracking(false);
1943 static MemoryListener core_memory_listener = {
1944 .log_global_start = core_log_global_start,
1945 .log_global_stop = core_log_global_stop,
1946 .priority = 1,
1949 void address_space_init_dispatch(AddressSpace *as)
1951 as->dispatch = NULL;
1952 as->dispatch_listener = (MemoryListener) {
1953 .begin = mem_begin,
1954 .commit = mem_commit,
1955 .region_add = mem_add,
1956 .region_nop = mem_add,
1957 .priority = 0,
1959 memory_listener_register(&as->dispatch_listener, as);
1962 void address_space_destroy_dispatch(AddressSpace *as)
1964 AddressSpaceDispatch *d = as->dispatch;
1966 memory_listener_unregister(&as->dispatch_listener);
1967 g_free(d);
1968 as->dispatch = NULL;
1971 static void memory_map_init(void)
1973 system_memory = g_malloc(sizeof(*system_memory));
1975 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
1976 address_space_init(&address_space_memory, system_memory, "memory");
1978 system_io = g_malloc(sizeof(*system_io));
1979 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
1980 65536);
1981 address_space_init(&address_space_io, system_io, "I/O");
1983 memory_listener_register(&core_memory_listener, &address_space_memory);
1986 MemoryRegion *get_system_memory(void)
1988 return system_memory;
1991 MemoryRegion *get_system_io(void)
1993 return system_io;
1996 #endif /* !defined(CONFIG_USER_ONLY) */
1998 /* physical memory access (slow version, mainly for debug) */
1999 #if defined(CONFIG_USER_ONLY)
2000 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2001 uint8_t *buf, int len, int is_write)
2003 int l, flags;
2004 target_ulong page;
2005 void * p;
2007 while (len > 0) {
2008 page = addr & TARGET_PAGE_MASK;
2009 l = (page + TARGET_PAGE_SIZE) - addr;
2010 if (l > len)
2011 l = len;
2012 flags = page_get_flags(page);
2013 if (!(flags & PAGE_VALID))
2014 return -1;
2015 if (is_write) {
2016 if (!(flags & PAGE_WRITE))
2017 return -1;
2018 /* XXX: this code should not depend on lock_user */
2019 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2020 return -1;
2021 memcpy(p, buf, l);
2022 unlock_user(p, addr, l);
2023 } else {
2024 if (!(flags & PAGE_READ))
2025 return -1;
2026 /* XXX: this code should not depend on lock_user */
2027 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2028 return -1;
2029 memcpy(buf, p, l);
2030 unlock_user(p, addr, 0);
2032 len -= l;
2033 buf += l;
2034 addr += l;
2036 return 0;
2039 #else
2041 static void invalidate_and_set_dirty(hwaddr addr,
2042 hwaddr length)
2044 if (cpu_physical_memory_is_clean(addr)) {
2045 /* invalidate code */
2046 tb_invalidate_phys_page_range(addr, addr + length, 0);
2047 /* set dirty bit */
2048 cpu_physical_memory_set_dirty_range_nocode(addr, length);
2050 xen_modified_memory(addr, length);
2053 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2055 unsigned access_size_max = mr->ops->valid.max_access_size;
2057 /* Regions are assumed to support 1-4 byte accesses unless
2058 otherwise specified. */
2059 if (access_size_max == 0) {
2060 access_size_max = 4;
2063 /* Bound the maximum access by the alignment of the address. */
2064 if (!mr->ops->impl.unaligned) {
2065 unsigned align_size_max = addr & -addr;
2066 if (align_size_max != 0 && align_size_max < access_size_max) {
2067 access_size_max = align_size_max;
2071 /* Don't attempt accesses larger than the maximum. */
2072 if (l > access_size_max) {
2073 l = access_size_max;
2075 if (l & (l - 1)) {
2076 l = 1 << (qemu_fls(l) - 1);
2079 return l;
2082 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
2083 int len, bool is_write)
2085 hwaddr l;
2086 uint8_t *ptr;
2087 uint64_t val;
2088 hwaddr addr1;
2089 MemoryRegion *mr;
2090 bool error = false;
2092 while (len > 0) {
2093 l = len;
2094 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2096 if (is_write) {
2097 if (!memory_access_is_direct(mr, is_write)) {
2098 l = memory_access_size(mr, l, addr1);
2099 /* XXX: could force current_cpu to NULL to avoid
2100 potential bugs */
2101 switch (l) {
2102 case 8:
2103 /* 64 bit write access */
2104 val = ldq_p(buf);
2105 error |= io_mem_write(mr, addr1, val, 8);
2106 break;
2107 case 4:
2108 /* 32 bit write access */
2109 val = ldl_p(buf);
2110 error |= io_mem_write(mr, addr1, val, 4);
2111 break;
2112 case 2:
2113 /* 16 bit write access */
2114 val = lduw_p(buf);
2115 error |= io_mem_write(mr, addr1, val, 2);
2116 break;
2117 case 1:
2118 /* 8 bit write access */
2119 val = ldub_p(buf);
2120 error |= io_mem_write(mr, addr1, val, 1);
2121 break;
2122 default:
2123 abort();
2125 } else {
2126 addr1 += memory_region_get_ram_addr(mr);
2127 /* RAM case */
2128 ptr = qemu_get_ram_ptr(addr1);
2129 memcpy(ptr, buf, l);
2130 invalidate_and_set_dirty(addr1, l);
2132 } else {
2133 if (!memory_access_is_direct(mr, is_write)) {
2134 /* I/O case */
2135 l = memory_access_size(mr, l, addr1);
2136 switch (l) {
2137 case 8:
2138 /* 64 bit read access */
2139 error |= io_mem_read(mr, addr1, &val, 8);
2140 stq_p(buf, val);
2141 break;
2142 case 4:
2143 /* 32 bit read access */
2144 error |= io_mem_read(mr, addr1, &val, 4);
2145 stl_p(buf, val);
2146 break;
2147 case 2:
2148 /* 16 bit read access */
2149 error |= io_mem_read(mr, addr1, &val, 2);
2150 stw_p(buf, val);
2151 break;
2152 case 1:
2153 /* 8 bit read access */
2154 error |= io_mem_read(mr, addr1, &val, 1);
2155 stb_p(buf, val);
2156 break;
2157 default:
2158 abort();
2160 } else {
2161 /* RAM case */
2162 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2163 memcpy(buf, ptr, l);
2166 len -= l;
2167 buf += l;
2168 addr += l;
2171 return error;
2174 bool address_space_write(AddressSpace *as, hwaddr addr,
2175 const uint8_t *buf, int len)
2177 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2180 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2182 return address_space_rw(as, addr, buf, len, false);
2186 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2187 int len, int is_write)
2189 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2192 enum write_rom_type {
2193 WRITE_DATA,
2194 FLUSH_CACHE,
2197 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2198 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2200 hwaddr l;
2201 uint8_t *ptr;
2202 hwaddr addr1;
2203 MemoryRegion *mr;
2205 while (len > 0) {
2206 l = len;
2207 mr = address_space_translate(as, addr, &addr1, &l, true);
2209 if (!(memory_region_is_ram(mr) ||
2210 memory_region_is_romd(mr))) {
2211 /* do nothing */
2212 } else {
2213 addr1 += memory_region_get_ram_addr(mr);
2214 /* ROM/RAM case */
2215 ptr = qemu_get_ram_ptr(addr1);
2216 switch (type) {
2217 case WRITE_DATA:
2218 memcpy(ptr, buf, l);
2219 invalidate_and_set_dirty(addr1, l);
2220 break;
2221 case FLUSH_CACHE:
2222 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2223 break;
2226 len -= l;
2227 buf += l;
2228 addr += l;
2232 /* used for ROM loading : can write in RAM and ROM */
2233 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2234 const uint8_t *buf, int len)
2236 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2239 void cpu_flush_icache_range(hwaddr start, int len)
2242 * This function should do the same thing as an icache flush that was
2243 * triggered from within the guest. For TCG we are always cache coherent,
2244 * so there is no need to flush anything. For KVM / Xen we need to flush
2245 * the host's instruction cache at least.
2247 if (tcg_enabled()) {
2248 return;
2251 cpu_physical_memory_write_rom_internal(&address_space_memory,
2252 start, NULL, len, FLUSH_CACHE);
2255 typedef struct {
2256 MemoryRegion *mr;
2257 void *buffer;
2258 hwaddr addr;
2259 hwaddr len;
2260 } BounceBuffer;
2262 static BounceBuffer bounce;
2264 typedef struct MapClient {
2265 void *opaque;
2266 void (*callback)(void *opaque);
2267 QLIST_ENTRY(MapClient) link;
2268 } MapClient;
2270 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2271 = QLIST_HEAD_INITIALIZER(map_client_list);
2273 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2275 MapClient *client = g_malloc(sizeof(*client));
2277 client->opaque = opaque;
2278 client->callback = callback;
2279 QLIST_INSERT_HEAD(&map_client_list, client, link);
2280 return client;
2283 static void cpu_unregister_map_client(void *_client)
2285 MapClient *client = (MapClient *)_client;
2287 QLIST_REMOVE(client, link);
2288 g_free(client);
2291 static void cpu_notify_map_clients(void)
2293 MapClient *client;
2295 while (!QLIST_EMPTY(&map_client_list)) {
2296 client = QLIST_FIRST(&map_client_list);
2297 client->callback(client->opaque);
2298 cpu_unregister_map_client(client);
2302 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2304 MemoryRegion *mr;
2305 hwaddr l, xlat;
2307 while (len > 0) {
2308 l = len;
2309 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2310 if (!memory_access_is_direct(mr, is_write)) {
2311 l = memory_access_size(mr, l, addr);
2312 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2313 return false;
2317 len -= l;
2318 addr += l;
2320 return true;
2323 /* Map a physical memory region into a host virtual address.
2324 * May map a subset of the requested range, given by and returned in *plen.
2325 * May return NULL if resources needed to perform the mapping are exhausted.
2326 * Use only for reads OR writes - not for read-modify-write operations.
2327 * Use cpu_register_map_client() to know when retrying the map operation is
2328 * likely to succeed.
2330 void *address_space_map(AddressSpace *as,
2331 hwaddr addr,
2332 hwaddr *plen,
2333 bool is_write)
2335 hwaddr len = *plen;
2336 hwaddr done = 0;
2337 hwaddr l, xlat, base;
2338 MemoryRegion *mr, *this_mr;
2339 ram_addr_t raddr;
2341 if (len == 0) {
2342 return NULL;
2345 l = len;
2346 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2347 if (!memory_access_is_direct(mr, is_write)) {
2348 if (bounce.buffer) {
2349 return NULL;
2351 /* Avoid unbounded allocations */
2352 l = MIN(l, TARGET_PAGE_SIZE);
2353 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2354 bounce.addr = addr;
2355 bounce.len = l;
2357 memory_region_ref(mr);
2358 bounce.mr = mr;
2359 if (!is_write) {
2360 address_space_read(as, addr, bounce.buffer, l);
2363 *plen = l;
2364 return bounce.buffer;
2367 base = xlat;
2368 raddr = memory_region_get_ram_addr(mr);
2370 for (;;) {
2371 len -= l;
2372 addr += l;
2373 done += l;
2374 if (len == 0) {
2375 break;
2378 l = len;
2379 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2380 if (this_mr != mr || xlat != base + done) {
2381 break;
2385 memory_region_ref(mr);
2386 *plen = done;
2387 return qemu_ram_ptr_length(raddr + base, plen);
2390 /* Unmaps a memory region previously mapped by address_space_map().
2391 * Will also mark the memory as dirty if is_write == 1. access_len gives
2392 * the amount of memory that was actually read or written by the caller.
2394 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2395 int is_write, hwaddr access_len)
2397 if (buffer != bounce.buffer) {
2398 MemoryRegion *mr;
2399 ram_addr_t addr1;
2401 mr = qemu_ram_addr_from_host(buffer, &addr1);
2402 assert(mr != NULL);
2403 if (is_write) {
2404 invalidate_and_set_dirty(addr1, access_len);
2406 if (xen_enabled()) {
2407 xen_invalidate_map_cache_entry(buffer);
2409 memory_region_unref(mr);
2410 return;
2412 if (is_write) {
2413 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2415 qemu_vfree(bounce.buffer);
2416 bounce.buffer = NULL;
2417 memory_region_unref(bounce.mr);
2418 cpu_notify_map_clients();
2421 void *cpu_physical_memory_map(hwaddr addr,
2422 hwaddr *plen,
2423 int is_write)
2425 return address_space_map(&address_space_memory, addr, plen, is_write);
2428 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2429 int is_write, hwaddr access_len)
2431 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2434 /* warning: addr must be aligned */
2435 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2436 enum device_endian endian)
2438 uint8_t *ptr;
2439 uint64_t val;
2440 MemoryRegion *mr;
2441 hwaddr l = 4;
2442 hwaddr addr1;
2444 mr = address_space_translate(as, addr, &addr1, &l, false);
2445 if (l < 4 || !memory_access_is_direct(mr, false)) {
2446 /* I/O case */
2447 io_mem_read(mr, addr1, &val, 4);
2448 #if defined(TARGET_WORDS_BIGENDIAN)
2449 if (endian == DEVICE_LITTLE_ENDIAN) {
2450 val = bswap32(val);
2452 #else
2453 if (endian == DEVICE_BIG_ENDIAN) {
2454 val = bswap32(val);
2456 #endif
2457 } else {
2458 /* RAM case */
2459 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2460 & TARGET_PAGE_MASK)
2461 + addr1);
2462 switch (endian) {
2463 case DEVICE_LITTLE_ENDIAN:
2464 val = ldl_le_p(ptr);
2465 break;
2466 case DEVICE_BIG_ENDIAN:
2467 val = ldl_be_p(ptr);
2468 break;
2469 default:
2470 val = ldl_p(ptr);
2471 break;
2474 return val;
2477 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2479 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2482 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2484 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2487 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2489 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2492 /* warning: addr must be aligned */
2493 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2494 enum device_endian endian)
2496 uint8_t *ptr;
2497 uint64_t val;
2498 MemoryRegion *mr;
2499 hwaddr l = 8;
2500 hwaddr addr1;
2502 mr = address_space_translate(as, addr, &addr1, &l,
2503 false);
2504 if (l < 8 || !memory_access_is_direct(mr, false)) {
2505 /* I/O case */
2506 io_mem_read(mr, addr1, &val, 8);
2507 #if defined(TARGET_WORDS_BIGENDIAN)
2508 if (endian == DEVICE_LITTLE_ENDIAN) {
2509 val = bswap64(val);
2511 #else
2512 if (endian == DEVICE_BIG_ENDIAN) {
2513 val = bswap64(val);
2515 #endif
2516 } else {
2517 /* RAM case */
2518 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2519 & TARGET_PAGE_MASK)
2520 + addr1);
2521 switch (endian) {
2522 case DEVICE_LITTLE_ENDIAN:
2523 val = ldq_le_p(ptr);
2524 break;
2525 case DEVICE_BIG_ENDIAN:
2526 val = ldq_be_p(ptr);
2527 break;
2528 default:
2529 val = ldq_p(ptr);
2530 break;
2533 return val;
2536 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2538 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2541 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2543 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2546 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2548 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2551 /* XXX: optimize */
2552 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2554 uint8_t val;
2555 address_space_rw(as, addr, &val, 1, 0);
2556 return val;
2559 /* warning: addr must be aligned */
2560 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2561 enum device_endian endian)
2563 uint8_t *ptr;
2564 uint64_t val;
2565 MemoryRegion *mr;
2566 hwaddr l = 2;
2567 hwaddr addr1;
2569 mr = address_space_translate(as, addr, &addr1, &l,
2570 false);
2571 if (l < 2 || !memory_access_is_direct(mr, false)) {
2572 /* I/O case */
2573 io_mem_read(mr, addr1, &val, 2);
2574 #if defined(TARGET_WORDS_BIGENDIAN)
2575 if (endian == DEVICE_LITTLE_ENDIAN) {
2576 val = bswap16(val);
2578 #else
2579 if (endian == DEVICE_BIG_ENDIAN) {
2580 val = bswap16(val);
2582 #endif
2583 } else {
2584 /* RAM case */
2585 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2586 & TARGET_PAGE_MASK)
2587 + addr1);
2588 switch (endian) {
2589 case DEVICE_LITTLE_ENDIAN:
2590 val = lduw_le_p(ptr);
2591 break;
2592 case DEVICE_BIG_ENDIAN:
2593 val = lduw_be_p(ptr);
2594 break;
2595 default:
2596 val = lduw_p(ptr);
2597 break;
2600 return val;
2603 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2605 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2608 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2610 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2613 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2615 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2618 /* warning: addr must be aligned. The ram page is not masked as dirty
2619 and the code inside is not invalidated. It is useful if the dirty
2620 bits are used to track modified PTEs */
2621 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2623 uint8_t *ptr;
2624 MemoryRegion *mr;
2625 hwaddr l = 4;
2626 hwaddr addr1;
2628 mr = address_space_translate(as, addr, &addr1, &l,
2629 true);
2630 if (l < 4 || !memory_access_is_direct(mr, true)) {
2631 io_mem_write(mr, addr1, val, 4);
2632 } else {
2633 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2634 ptr = qemu_get_ram_ptr(addr1);
2635 stl_p(ptr, val);
2637 if (unlikely(in_migration)) {
2638 if (cpu_physical_memory_is_clean(addr1)) {
2639 /* invalidate code */
2640 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2641 /* set dirty bit */
2642 cpu_physical_memory_set_dirty_range_nocode(addr1, 4);
2648 /* warning: addr must be aligned */
2649 static inline void stl_phys_internal(AddressSpace *as,
2650 hwaddr addr, uint32_t val,
2651 enum device_endian endian)
2653 uint8_t *ptr;
2654 MemoryRegion *mr;
2655 hwaddr l = 4;
2656 hwaddr addr1;
2658 mr = address_space_translate(as, addr, &addr1, &l,
2659 true);
2660 if (l < 4 || !memory_access_is_direct(mr, true)) {
2661 #if defined(TARGET_WORDS_BIGENDIAN)
2662 if (endian == DEVICE_LITTLE_ENDIAN) {
2663 val = bswap32(val);
2665 #else
2666 if (endian == DEVICE_BIG_ENDIAN) {
2667 val = bswap32(val);
2669 #endif
2670 io_mem_write(mr, addr1, val, 4);
2671 } else {
2672 /* RAM case */
2673 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2674 ptr = qemu_get_ram_ptr(addr1);
2675 switch (endian) {
2676 case DEVICE_LITTLE_ENDIAN:
2677 stl_le_p(ptr, val);
2678 break;
2679 case DEVICE_BIG_ENDIAN:
2680 stl_be_p(ptr, val);
2681 break;
2682 default:
2683 stl_p(ptr, val);
2684 break;
2686 invalidate_and_set_dirty(addr1, 4);
2690 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2692 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2695 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2697 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2700 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2702 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2705 /* XXX: optimize */
2706 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2708 uint8_t v = val;
2709 address_space_rw(as, addr, &v, 1, 1);
2712 /* warning: addr must be aligned */
2713 static inline void stw_phys_internal(AddressSpace *as,
2714 hwaddr addr, uint32_t val,
2715 enum device_endian endian)
2717 uint8_t *ptr;
2718 MemoryRegion *mr;
2719 hwaddr l = 2;
2720 hwaddr addr1;
2722 mr = address_space_translate(as, addr, &addr1, &l, true);
2723 if (l < 2 || !memory_access_is_direct(mr, true)) {
2724 #if defined(TARGET_WORDS_BIGENDIAN)
2725 if (endian == DEVICE_LITTLE_ENDIAN) {
2726 val = bswap16(val);
2728 #else
2729 if (endian == DEVICE_BIG_ENDIAN) {
2730 val = bswap16(val);
2732 #endif
2733 io_mem_write(mr, addr1, val, 2);
2734 } else {
2735 /* RAM case */
2736 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2737 ptr = qemu_get_ram_ptr(addr1);
2738 switch (endian) {
2739 case DEVICE_LITTLE_ENDIAN:
2740 stw_le_p(ptr, val);
2741 break;
2742 case DEVICE_BIG_ENDIAN:
2743 stw_be_p(ptr, val);
2744 break;
2745 default:
2746 stw_p(ptr, val);
2747 break;
2749 invalidate_and_set_dirty(addr1, 2);
2753 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2755 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2758 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2760 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2763 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2765 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2768 /* XXX: optimize */
2769 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2771 val = tswap64(val);
2772 address_space_rw(as, addr, (void *) &val, 8, 1);
2775 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2777 val = cpu_to_le64(val);
2778 address_space_rw(as, addr, (void *) &val, 8, 1);
2781 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2783 val = cpu_to_be64(val);
2784 address_space_rw(as, addr, (void *) &val, 8, 1);
2787 /* virtual memory access for debug (includes writing to ROM) */
2788 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2789 uint8_t *buf, int len, int is_write)
2791 int l;
2792 hwaddr phys_addr;
2793 target_ulong page;
2795 while (len > 0) {
2796 page = addr & TARGET_PAGE_MASK;
2797 phys_addr = cpu_get_phys_page_debug(cpu, page);
2798 /* if no physical page mapped, return an error */
2799 if (phys_addr == -1)
2800 return -1;
2801 l = (page + TARGET_PAGE_SIZE) - addr;
2802 if (l > len)
2803 l = len;
2804 phys_addr += (addr & ~TARGET_PAGE_MASK);
2805 if (is_write) {
2806 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
2807 } else {
2808 address_space_rw(cpu->as, phys_addr, buf, l, 0);
2810 len -= l;
2811 buf += l;
2812 addr += l;
2814 return 0;
2816 #endif
2819 * A helper function for the _utterly broken_ virtio device model to find out if
2820 * it's running on a big endian machine. Don't do this at home kids!
2822 bool target_words_bigendian(void);
2823 bool target_words_bigendian(void)
2825 #if defined(TARGET_WORDS_BIGENDIAN)
2826 return true;
2827 #else
2828 return false;
2829 #endif
2832 #ifndef CONFIG_USER_ONLY
2833 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2835 MemoryRegion*mr;
2836 hwaddr l = 1;
2838 mr = address_space_translate(&address_space_memory,
2839 phys_addr, &phys_addr, &l, false);
2841 return !(memory_region_is_ram(mr) ||
2842 memory_region_is_romd(mr));
2845 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2847 RAMBlock *block;
2849 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2850 func(block->host, block->offset, block->length, opaque);
2853 #endif