Handle bi-directional communication for fd migration
[qemu/kevin.git] / exec.c
blob081818e6e897393d4e1b0fd9f42a41c5e7556065
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #include "hw/qdev.h"
30 #include "qemu/osdep.h"
31 #include "sysemu/kvm.h"
32 #include "sysemu/sysemu.h"
33 #include "hw/xen/xen.h"
34 #include "qemu/timer.h"
35 #include "qemu/config-file.h"
36 #include "qemu/error-report.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
51 #include "exec/memory-internal.h"
52 #include "exec/ram_addr.h"
54 #include "qemu/range.h"
56 //#define DEBUG_SUBPAGE
58 #if !defined(CONFIG_USER_ONLY)
59 static bool in_migration;
61 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
63 static MemoryRegion *system_memory;
64 static MemoryRegion *system_io;
66 AddressSpace address_space_io;
67 AddressSpace address_space_memory;
69 MemoryRegion io_mem_rom, io_mem_notdirty;
70 static MemoryRegion io_mem_unassigned;
72 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
73 #define RAM_PREALLOC (1 << 0)
75 /* RAM is mmap-ed with MAP_SHARED */
76 #define RAM_SHARED (1 << 1)
78 /* Only a portion of RAM (used_length) is actually used, and migrated.
79 * This used_length size can change across reboots.
81 #define RAM_RESIZEABLE (1 << 2)
83 #endif
85 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
86 /* current CPU in the current thread. It is only valid inside
87 cpu_exec() */
88 DEFINE_TLS(CPUState *, current_cpu);
89 /* 0 = Do not count executed instructions.
90 1 = Precise instruction counting.
91 2 = Adaptive rate instruction counting. */
92 int use_icount;
94 #if !defined(CONFIG_USER_ONLY)
96 typedef struct PhysPageEntry PhysPageEntry;
98 struct PhysPageEntry {
99 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
100 uint32_t skip : 6;
101 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
102 uint32_t ptr : 26;
105 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
107 /* Size of the L2 (and L3, etc) page tables. */
108 #define ADDR_SPACE_BITS 64
110 #define P_L2_BITS 9
111 #define P_L2_SIZE (1 << P_L2_BITS)
113 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
115 typedef PhysPageEntry Node[P_L2_SIZE];
117 typedef struct PhysPageMap {
118 unsigned sections_nb;
119 unsigned sections_nb_alloc;
120 unsigned nodes_nb;
121 unsigned nodes_nb_alloc;
122 Node *nodes;
123 MemoryRegionSection *sections;
124 } PhysPageMap;
126 struct AddressSpaceDispatch {
127 /* This is a multi-level map on the physical address space.
128 * The bottom level has pointers to MemoryRegionSections.
130 PhysPageEntry phys_map;
131 PhysPageMap map;
132 AddressSpace *as;
135 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
136 typedef struct subpage_t {
137 MemoryRegion iomem;
138 AddressSpace *as;
139 hwaddr base;
140 uint16_t sub_section[TARGET_PAGE_SIZE];
141 } subpage_t;
143 #define PHYS_SECTION_UNASSIGNED 0
144 #define PHYS_SECTION_NOTDIRTY 1
145 #define PHYS_SECTION_ROM 2
146 #define PHYS_SECTION_WATCH 3
148 static void io_mem_init(void);
149 static void memory_map_init(void);
150 static void tcg_commit(MemoryListener *listener);
152 static MemoryRegion io_mem_watch;
153 #endif
155 #if !defined(CONFIG_USER_ONLY)
157 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
159 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
160 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
161 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
162 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
166 static uint32_t phys_map_node_alloc(PhysPageMap *map)
168 unsigned i;
169 uint32_t ret;
171 ret = map->nodes_nb++;
172 assert(ret != PHYS_MAP_NODE_NIL);
173 assert(ret != map->nodes_nb_alloc);
174 for (i = 0; i < P_L2_SIZE; ++i) {
175 map->nodes[ret][i].skip = 1;
176 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
178 return ret;
181 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
182 hwaddr *index, hwaddr *nb, uint16_t leaf,
183 int level)
185 PhysPageEntry *p;
186 int i;
187 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
189 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
190 lp->ptr = phys_map_node_alloc(map);
191 p = map->nodes[lp->ptr];
192 if (level == 0) {
193 for (i = 0; i < P_L2_SIZE; i++) {
194 p[i].skip = 0;
195 p[i].ptr = PHYS_SECTION_UNASSIGNED;
198 } else {
199 p = map->nodes[lp->ptr];
201 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
203 while (*nb && lp < &p[P_L2_SIZE]) {
204 if ((*index & (step - 1)) == 0 && *nb >= step) {
205 lp->skip = 0;
206 lp->ptr = leaf;
207 *index += step;
208 *nb -= step;
209 } else {
210 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
212 ++lp;
216 static void phys_page_set(AddressSpaceDispatch *d,
217 hwaddr index, hwaddr nb,
218 uint16_t leaf)
220 /* Wildly overreserve - it doesn't matter much. */
221 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
223 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
226 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
227 * and update our entry so we can skip it and go directly to the destination.
229 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
231 unsigned valid_ptr = P_L2_SIZE;
232 int valid = 0;
233 PhysPageEntry *p;
234 int i;
236 if (lp->ptr == PHYS_MAP_NODE_NIL) {
237 return;
240 p = nodes[lp->ptr];
241 for (i = 0; i < P_L2_SIZE; i++) {
242 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
243 continue;
246 valid_ptr = i;
247 valid++;
248 if (p[i].skip) {
249 phys_page_compact(&p[i], nodes, compacted);
253 /* We can only compress if there's only one child. */
254 if (valid != 1) {
255 return;
258 assert(valid_ptr < P_L2_SIZE);
260 /* Don't compress if it won't fit in the # of bits we have. */
261 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
262 return;
265 lp->ptr = p[valid_ptr].ptr;
266 if (!p[valid_ptr].skip) {
267 /* If our only child is a leaf, make this a leaf. */
268 /* By design, we should have made this node a leaf to begin with so we
269 * should never reach here.
270 * But since it's so simple to handle this, let's do it just in case we
271 * change this rule.
273 lp->skip = 0;
274 } else {
275 lp->skip += p[valid_ptr].skip;
279 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
281 DECLARE_BITMAP(compacted, nodes_nb);
283 if (d->phys_map.skip) {
284 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
288 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
289 Node *nodes, MemoryRegionSection *sections)
291 PhysPageEntry *p;
292 hwaddr index = addr >> TARGET_PAGE_BITS;
293 int i;
295 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
296 if (lp.ptr == PHYS_MAP_NODE_NIL) {
297 return &sections[PHYS_SECTION_UNASSIGNED];
299 p = nodes[lp.ptr];
300 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
303 if (sections[lp.ptr].size.hi ||
304 range_covers_byte(sections[lp.ptr].offset_within_address_space,
305 sections[lp.ptr].size.lo, addr)) {
306 return &sections[lp.ptr];
307 } else {
308 return &sections[PHYS_SECTION_UNASSIGNED];
312 bool memory_region_is_unassigned(MemoryRegion *mr)
314 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
315 && mr != &io_mem_watch;
318 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
319 hwaddr addr,
320 bool resolve_subpage)
322 MemoryRegionSection *section;
323 subpage_t *subpage;
325 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
326 if (resolve_subpage && section->mr->subpage) {
327 subpage = container_of(section->mr, subpage_t, iomem);
328 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
330 return section;
333 static MemoryRegionSection *
334 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
335 hwaddr *plen, bool resolve_subpage)
337 MemoryRegionSection *section;
338 Int128 diff;
340 section = address_space_lookup_region(d, addr, resolve_subpage);
341 /* Compute offset within MemoryRegionSection */
342 addr -= section->offset_within_address_space;
344 /* Compute offset within MemoryRegion */
345 *xlat = addr + section->offset_within_region;
347 diff = int128_sub(section->mr->size, int128_make64(addr));
348 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
349 return section;
352 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
354 if (memory_region_is_ram(mr)) {
355 return !(is_write && mr->readonly);
357 if (memory_region_is_romd(mr)) {
358 return !is_write;
361 return false;
364 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
365 hwaddr *xlat, hwaddr *plen,
366 bool is_write)
368 IOMMUTLBEntry iotlb;
369 MemoryRegionSection *section;
370 MemoryRegion *mr;
371 hwaddr len = *plen;
373 for (;;) {
374 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
375 mr = section->mr;
377 if (!mr->iommu_ops) {
378 break;
381 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
382 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
383 | (addr & iotlb.addr_mask));
384 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
385 if (!(iotlb.perm & (1 << is_write))) {
386 mr = &io_mem_unassigned;
387 break;
390 as = iotlb.target_as;
393 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
394 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
395 len = MIN(page, len);
398 *plen = len;
399 *xlat = addr;
400 return mr;
403 MemoryRegionSection *
404 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
405 hwaddr *plen)
407 MemoryRegionSection *section;
408 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
410 assert(!section->mr->iommu_ops);
411 return section;
413 #endif
415 void cpu_exec_init_all(void)
417 #if !defined(CONFIG_USER_ONLY)
418 qemu_mutex_init(&ram_list.mutex);
419 memory_map_init();
420 io_mem_init();
421 #endif
424 #if !defined(CONFIG_USER_ONLY)
426 static int cpu_common_post_load(void *opaque, int version_id)
428 CPUState *cpu = opaque;
430 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
431 version_id is increased. */
432 cpu->interrupt_request &= ~0x01;
433 tlb_flush(cpu, 1);
435 return 0;
438 static int cpu_common_pre_load(void *opaque)
440 CPUState *cpu = opaque;
442 cpu->exception_index = -1;
444 return 0;
447 static bool cpu_common_exception_index_needed(void *opaque)
449 CPUState *cpu = opaque;
451 return tcg_enabled() && cpu->exception_index != -1;
454 static const VMStateDescription vmstate_cpu_common_exception_index = {
455 .name = "cpu_common/exception_index",
456 .version_id = 1,
457 .minimum_version_id = 1,
458 .fields = (VMStateField[]) {
459 VMSTATE_INT32(exception_index, CPUState),
460 VMSTATE_END_OF_LIST()
464 const VMStateDescription vmstate_cpu_common = {
465 .name = "cpu_common",
466 .version_id = 1,
467 .minimum_version_id = 1,
468 .pre_load = cpu_common_pre_load,
469 .post_load = cpu_common_post_load,
470 .fields = (VMStateField[]) {
471 VMSTATE_UINT32(halted, CPUState),
472 VMSTATE_UINT32(interrupt_request, CPUState),
473 VMSTATE_END_OF_LIST()
475 .subsections = (VMStateSubsection[]) {
477 .vmsd = &vmstate_cpu_common_exception_index,
478 .needed = cpu_common_exception_index_needed,
479 } , {
480 /* empty */
485 #endif
487 CPUState *qemu_get_cpu(int index)
489 CPUState *cpu;
491 CPU_FOREACH(cpu) {
492 if (cpu->cpu_index == index) {
493 return cpu;
497 return NULL;
500 #if !defined(CONFIG_USER_ONLY)
501 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
503 /* We only support one address space per cpu at the moment. */
504 assert(cpu->as == as);
506 if (cpu->tcg_as_listener) {
507 memory_listener_unregister(cpu->tcg_as_listener);
508 } else {
509 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
511 cpu->tcg_as_listener->commit = tcg_commit;
512 memory_listener_register(cpu->tcg_as_listener, as);
514 #endif
516 void cpu_exec_init(CPUArchState *env)
518 CPUState *cpu = ENV_GET_CPU(env);
519 CPUClass *cc = CPU_GET_CLASS(cpu);
520 CPUState *some_cpu;
521 int cpu_index;
523 #if defined(CONFIG_USER_ONLY)
524 cpu_list_lock();
525 #endif
526 cpu_index = 0;
527 CPU_FOREACH(some_cpu) {
528 cpu_index++;
530 cpu->cpu_index = cpu_index;
531 cpu->numa_node = 0;
532 QTAILQ_INIT(&cpu->breakpoints);
533 QTAILQ_INIT(&cpu->watchpoints);
534 #ifndef CONFIG_USER_ONLY
535 cpu->as = &address_space_memory;
536 cpu->thread_id = qemu_get_thread_id();
537 #endif
538 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
539 #if defined(CONFIG_USER_ONLY)
540 cpu_list_unlock();
541 #endif
542 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
543 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
545 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
546 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
547 cpu_save, cpu_load, env);
548 assert(cc->vmsd == NULL);
549 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
550 #endif
551 if (cc->vmsd != NULL) {
552 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
556 #if defined(TARGET_HAS_ICE)
557 #if defined(CONFIG_USER_ONLY)
558 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
560 tb_invalidate_phys_page_range(pc, pc + 1, 0);
562 #else
563 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
565 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
566 if (phys != -1) {
567 tb_invalidate_phys_addr(cpu->as,
568 phys | (pc & ~TARGET_PAGE_MASK));
571 #endif
572 #endif /* TARGET_HAS_ICE */
574 #if defined(CONFIG_USER_ONLY)
575 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
580 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
581 int flags)
583 return -ENOSYS;
586 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
590 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
591 int flags, CPUWatchpoint **watchpoint)
593 return -ENOSYS;
595 #else
596 /* Add a watchpoint. */
597 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
598 int flags, CPUWatchpoint **watchpoint)
600 CPUWatchpoint *wp;
602 /* forbid ranges which are empty or run off the end of the address space */
603 if (len == 0 || (addr + len - 1) < addr) {
604 error_report("tried to set invalid watchpoint at %"
605 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
606 return -EINVAL;
608 wp = g_malloc(sizeof(*wp));
610 wp->vaddr = addr;
611 wp->len = len;
612 wp->flags = flags;
614 /* keep all GDB-injected watchpoints in front */
615 if (flags & BP_GDB) {
616 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
617 } else {
618 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
621 tlb_flush_page(cpu, addr);
623 if (watchpoint)
624 *watchpoint = wp;
625 return 0;
628 /* Remove a specific watchpoint. */
629 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
630 int flags)
632 CPUWatchpoint *wp;
634 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
635 if (addr == wp->vaddr && len == wp->len
636 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
637 cpu_watchpoint_remove_by_ref(cpu, wp);
638 return 0;
641 return -ENOENT;
644 /* Remove a specific watchpoint by reference. */
645 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
647 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
649 tlb_flush_page(cpu, watchpoint->vaddr);
651 g_free(watchpoint);
654 /* Remove all matching watchpoints. */
655 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
657 CPUWatchpoint *wp, *next;
659 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
660 if (wp->flags & mask) {
661 cpu_watchpoint_remove_by_ref(cpu, wp);
666 /* Return true if this watchpoint address matches the specified
667 * access (ie the address range covered by the watchpoint overlaps
668 * partially or completely with the address range covered by the
669 * access).
671 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
672 vaddr addr,
673 vaddr len)
675 /* We know the lengths are non-zero, but a little caution is
676 * required to avoid errors in the case where the range ends
677 * exactly at the top of the address space and so addr + len
678 * wraps round to zero.
680 vaddr wpend = wp->vaddr + wp->len - 1;
681 vaddr addrend = addr + len - 1;
683 return !(addr > wpend || wp->vaddr > addrend);
686 #endif
688 /* Add a breakpoint. */
689 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
690 CPUBreakpoint **breakpoint)
692 #if defined(TARGET_HAS_ICE)
693 CPUBreakpoint *bp;
695 bp = g_malloc(sizeof(*bp));
697 bp->pc = pc;
698 bp->flags = flags;
700 /* keep all GDB-injected breakpoints in front */
701 if (flags & BP_GDB) {
702 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
703 } else {
704 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
707 breakpoint_invalidate(cpu, pc);
709 if (breakpoint) {
710 *breakpoint = bp;
712 return 0;
713 #else
714 return -ENOSYS;
715 #endif
718 /* Remove a specific breakpoint. */
719 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
721 #if defined(TARGET_HAS_ICE)
722 CPUBreakpoint *bp;
724 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
725 if (bp->pc == pc && bp->flags == flags) {
726 cpu_breakpoint_remove_by_ref(cpu, bp);
727 return 0;
730 return -ENOENT;
731 #else
732 return -ENOSYS;
733 #endif
736 /* Remove a specific breakpoint by reference. */
737 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
739 #if defined(TARGET_HAS_ICE)
740 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
742 breakpoint_invalidate(cpu, breakpoint->pc);
744 g_free(breakpoint);
745 #endif
748 /* Remove all matching breakpoints. */
749 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
751 #if defined(TARGET_HAS_ICE)
752 CPUBreakpoint *bp, *next;
754 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
755 if (bp->flags & mask) {
756 cpu_breakpoint_remove_by_ref(cpu, bp);
759 #endif
762 /* enable or disable single step mode. EXCP_DEBUG is returned by the
763 CPU loop after each instruction */
764 void cpu_single_step(CPUState *cpu, int enabled)
766 #if defined(TARGET_HAS_ICE)
767 if (cpu->singlestep_enabled != enabled) {
768 cpu->singlestep_enabled = enabled;
769 if (kvm_enabled()) {
770 kvm_update_guest_debug(cpu, 0);
771 } else {
772 /* must flush all the translated code to avoid inconsistencies */
773 /* XXX: only flush what is necessary */
774 CPUArchState *env = cpu->env_ptr;
775 tb_flush(env);
778 #endif
781 void cpu_abort(CPUState *cpu, const char *fmt, ...)
783 va_list ap;
784 va_list ap2;
786 va_start(ap, fmt);
787 va_copy(ap2, ap);
788 fprintf(stderr, "qemu: fatal: ");
789 vfprintf(stderr, fmt, ap);
790 fprintf(stderr, "\n");
791 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
792 if (qemu_log_enabled()) {
793 qemu_log("qemu: fatal: ");
794 qemu_log_vprintf(fmt, ap2);
795 qemu_log("\n");
796 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
797 qemu_log_flush();
798 qemu_log_close();
800 va_end(ap2);
801 va_end(ap);
802 #if defined(CONFIG_USER_ONLY)
804 struct sigaction act;
805 sigfillset(&act.sa_mask);
806 act.sa_handler = SIG_DFL;
807 sigaction(SIGABRT, &act, NULL);
809 #endif
810 abort();
813 #if !defined(CONFIG_USER_ONLY)
814 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
816 RAMBlock *block;
818 /* The list is protected by the iothread lock here. */
819 block = ram_list.mru_block;
820 if (block && addr - block->offset < block->max_length) {
821 goto found;
823 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
824 if (addr - block->offset < block->max_length) {
825 goto found;
829 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
830 abort();
832 found:
833 ram_list.mru_block = block;
834 return block;
837 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
839 ram_addr_t start1;
840 RAMBlock *block;
841 ram_addr_t end;
843 end = TARGET_PAGE_ALIGN(start + length);
844 start &= TARGET_PAGE_MASK;
846 block = qemu_get_ram_block(start);
847 assert(block == qemu_get_ram_block(end - 1));
848 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
849 cpu_tlb_reset_dirty_all(start1, length);
852 /* Note: start and end must be within the same ram block. */
853 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
854 unsigned client)
856 if (length == 0)
857 return;
858 cpu_physical_memory_clear_dirty_range_type(start, length, client);
860 if (tcg_enabled()) {
861 tlb_reset_dirty_range_all(start, length);
865 static void cpu_physical_memory_set_dirty_tracking(bool enable)
867 in_migration = enable;
870 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
871 MemoryRegionSection *section,
872 target_ulong vaddr,
873 hwaddr paddr, hwaddr xlat,
874 int prot,
875 target_ulong *address)
877 hwaddr iotlb;
878 CPUWatchpoint *wp;
880 if (memory_region_is_ram(section->mr)) {
881 /* Normal RAM. */
882 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
883 + xlat;
884 if (!section->readonly) {
885 iotlb |= PHYS_SECTION_NOTDIRTY;
886 } else {
887 iotlb |= PHYS_SECTION_ROM;
889 } else {
890 iotlb = section - section->address_space->dispatch->map.sections;
891 iotlb += xlat;
894 /* Make accesses to pages with watchpoints go via the
895 watchpoint trap routines. */
896 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
897 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
898 /* Avoid trapping reads of pages with a write breakpoint. */
899 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
900 iotlb = PHYS_SECTION_WATCH + paddr;
901 *address |= TLB_MMIO;
902 break;
907 return iotlb;
909 #endif /* defined(CONFIG_USER_ONLY) */
911 #if !defined(CONFIG_USER_ONLY)
913 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
914 uint16_t section);
915 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
917 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
918 qemu_anon_ram_alloc;
921 * Set a custom physical guest memory alloator.
922 * Accelerators with unusual needs may need this. Hopefully, we can
923 * get rid of it eventually.
925 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
927 phys_mem_alloc = alloc;
930 static uint16_t phys_section_add(PhysPageMap *map,
931 MemoryRegionSection *section)
933 /* The physical section number is ORed with a page-aligned
934 * pointer to produce the iotlb entries. Thus it should
935 * never overflow into the page-aligned value.
937 assert(map->sections_nb < TARGET_PAGE_SIZE);
939 if (map->sections_nb == map->sections_nb_alloc) {
940 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
941 map->sections = g_renew(MemoryRegionSection, map->sections,
942 map->sections_nb_alloc);
944 map->sections[map->sections_nb] = *section;
945 memory_region_ref(section->mr);
946 return map->sections_nb++;
949 static void phys_section_destroy(MemoryRegion *mr)
951 memory_region_unref(mr);
953 if (mr->subpage) {
954 subpage_t *subpage = container_of(mr, subpage_t, iomem);
955 object_unref(OBJECT(&subpage->iomem));
956 g_free(subpage);
960 static void phys_sections_free(PhysPageMap *map)
962 while (map->sections_nb > 0) {
963 MemoryRegionSection *section = &map->sections[--map->sections_nb];
964 phys_section_destroy(section->mr);
966 g_free(map->sections);
967 g_free(map->nodes);
970 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
972 subpage_t *subpage;
973 hwaddr base = section->offset_within_address_space
974 & TARGET_PAGE_MASK;
975 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
976 d->map.nodes, d->map.sections);
977 MemoryRegionSection subsection = {
978 .offset_within_address_space = base,
979 .size = int128_make64(TARGET_PAGE_SIZE),
981 hwaddr start, end;
983 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
985 if (!(existing->mr->subpage)) {
986 subpage = subpage_init(d->as, base);
987 subsection.address_space = d->as;
988 subsection.mr = &subpage->iomem;
989 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
990 phys_section_add(&d->map, &subsection));
991 } else {
992 subpage = container_of(existing->mr, subpage_t, iomem);
994 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
995 end = start + int128_get64(section->size) - 1;
996 subpage_register(subpage, start, end,
997 phys_section_add(&d->map, section));
1001 static void register_multipage(AddressSpaceDispatch *d,
1002 MemoryRegionSection *section)
1004 hwaddr start_addr = section->offset_within_address_space;
1005 uint16_t section_index = phys_section_add(&d->map, section);
1006 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1007 TARGET_PAGE_BITS));
1009 assert(num_pages);
1010 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1013 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1015 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1016 AddressSpaceDispatch *d = as->next_dispatch;
1017 MemoryRegionSection now = *section, remain = *section;
1018 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1020 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1021 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1022 - now.offset_within_address_space;
1024 now.size = int128_min(int128_make64(left), now.size);
1025 register_subpage(d, &now);
1026 } else {
1027 now.size = int128_zero();
1029 while (int128_ne(remain.size, now.size)) {
1030 remain.size = int128_sub(remain.size, now.size);
1031 remain.offset_within_address_space += int128_get64(now.size);
1032 remain.offset_within_region += int128_get64(now.size);
1033 now = remain;
1034 if (int128_lt(remain.size, page_size)) {
1035 register_subpage(d, &now);
1036 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1037 now.size = page_size;
1038 register_subpage(d, &now);
1039 } else {
1040 now.size = int128_and(now.size, int128_neg(page_size));
1041 register_multipage(d, &now);
1046 void qemu_flush_coalesced_mmio_buffer(void)
1048 if (kvm_enabled())
1049 kvm_flush_coalesced_mmio_buffer();
1052 void qemu_mutex_lock_ramlist(void)
1054 qemu_mutex_lock(&ram_list.mutex);
1057 void qemu_mutex_unlock_ramlist(void)
1059 qemu_mutex_unlock(&ram_list.mutex);
1062 #ifdef __linux__
1064 #include <sys/vfs.h>
1066 #define HUGETLBFS_MAGIC 0x958458f6
1068 static long gethugepagesize(const char *path, Error **errp)
1070 struct statfs fs;
1071 int ret;
1073 do {
1074 ret = statfs(path, &fs);
1075 } while (ret != 0 && errno == EINTR);
1077 if (ret != 0) {
1078 error_setg_errno(errp, errno, "failed to get page size of file %s",
1079 path);
1080 return 0;
1083 if (fs.f_type != HUGETLBFS_MAGIC)
1084 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1086 return fs.f_bsize;
1089 static void *file_ram_alloc(RAMBlock *block,
1090 ram_addr_t memory,
1091 const char *path,
1092 Error **errp)
1094 char *filename;
1095 char *sanitized_name;
1096 char *c;
1097 void *area = NULL;
1098 int fd;
1099 uint64_t hpagesize;
1100 Error *local_err = NULL;
1102 hpagesize = gethugepagesize(path, &local_err);
1103 if (local_err) {
1104 error_propagate(errp, local_err);
1105 goto error;
1107 block->mr->align = hpagesize;
1109 if (memory < hpagesize) {
1110 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1111 "or larger than huge page size 0x%" PRIx64,
1112 memory, hpagesize);
1113 goto error;
1116 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1117 error_setg(errp,
1118 "host lacks kvm mmu notifiers, -mem-path unsupported");
1119 goto error;
1122 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1123 sanitized_name = g_strdup(memory_region_name(block->mr));
1124 for (c = sanitized_name; *c != '\0'; c++) {
1125 if (*c == '/')
1126 *c = '_';
1129 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1130 sanitized_name);
1131 g_free(sanitized_name);
1133 fd = mkstemp(filename);
1134 if (fd < 0) {
1135 error_setg_errno(errp, errno,
1136 "unable to create backing store for hugepages");
1137 g_free(filename);
1138 goto error;
1140 unlink(filename);
1141 g_free(filename);
1143 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1146 * ftruncate is not supported by hugetlbfs in older
1147 * hosts, so don't bother bailing out on errors.
1148 * If anything goes wrong with it under other filesystems,
1149 * mmap will fail.
1151 if (ftruncate(fd, memory)) {
1152 perror("ftruncate");
1155 area = mmap(0, memory, PROT_READ | PROT_WRITE,
1156 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
1157 fd, 0);
1158 if (area == MAP_FAILED) {
1159 error_setg_errno(errp, errno,
1160 "unable to map backing store for hugepages");
1161 close(fd);
1162 goto error;
1165 if (mem_prealloc) {
1166 os_mem_prealloc(fd, area, memory);
1169 block->fd = fd;
1170 return area;
1172 error:
1173 if (mem_prealloc) {
1174 error_report("%s\n", error_get_pretty(*errp));
1175 exit(1);
1177 return NULL;
1179 #endif
1181 static ram_addr_t find_ram_offset(ram_addr_t size)
1183 RAMBlock *block, *next_block;
1184 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1186 assert(size != 0); /* it would hand out same offset multiple times */
1188 if (QTAILQ_EMPTY(&ram_list.blocks))
1189 return 0;
1191 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1192 ram_addr_t end, next = RAM_ADDR_MAX;
1194 end = block->offset + block->max_length;
1196 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1197 if (next_block->offset >= end) {
1198 next = MIN(next, next_block->offset);
1201 if (next - end >= size && next - end < mingap) {
1202 offset = end;
1203 mingap = next - end;
1207 if (offset == RAM_ADDR_MAX) {
1208 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1209 (uint64_t)size);
1210 abort();
1213 return offset;
1216 ram_addr_t last_ram_offset(void)
1218 RAMBlock *block;
1219 ram_addr_t last = 0;
1221 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1222 last = MAX(last, block->offset + block->max_length);
1224 return last;
1227 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1229 int ret;
1231 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1232 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1233 "dump-guest-core", true)) {
1234 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1235 if (ret) {
1236 perror("qemu_madvise");
1237 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1238 "but dump_guest_core=off specified\n");
1243 static RAMBlock *find_ram_block(ram_addr_t addr)
1245 RAMBlock *block;
1247 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1248 if (block->offset == addr) {
1249 return block;
1253 return NULL;
1256 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1258 RAMBlock *new_block = find_ram_block(addr);
1259 RAMBlock *block;
1261 assert(new_block);
1262 assert(!new_block->idstr[0]);
1264 if (dev) {
1265 char *id = qdev_get_dev_path(dev);
1266 if (id) {
1267 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1268 g_free(id);
1271 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1273 /* This assumes the iothread lock is taken here too. */
1274 qemu_mutex_lock_ramlist();
1275 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1276 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1277 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1278 new_block->idstr);
1279 abort();
1282 qemu_mutex_unlock_ramlist();
1285 void qemu_ram_unset_idstr(ram_addr_t addr)
1287 RAMBlock *block = find_ram_block(addr);
1289 if (block) {
1290 memset(block->idstr, 0, sizeof(block->idstr));
1294 static int memory_try_enable_merging(void *addr, size_t len)
1296 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1297 /* disabled by the user */
1298 return 0;
1301 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1304 /* Only legal before guest might have detected the memory size: e.g. on
1305 * incoming migration, or right after reset.
1307 * As memory core doesn't know how is memory accessed, it is up to
1308 * resize callback to update device state and/or add assertions to detect
1309 * misuse, if necessary.
1311 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1313 RAMBlock *block = find_ram_block(base);
1315 assert(block);
1317 if (block->used_length == newsize) {
1318 return 0;
1321 if (!(block->flags & RAM_RESIZEABLE)) {
1322 error_setg_errno(errp, EINVAL,
1323 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1324 " in != 0x" RAM_ADDR_FMT, block->idstr,
1325 newsize, block->used_length);
1326 return -EINVAL;
1329 if (block->max_length < newsize) {
1330 error_setg_errno(errp, EINVAL,
1331 "Length too large: %s: 0x" RAM_ADDR_FMT
1332 " > 0x" RAM_ADDR_FMT, block->idstr,
1333 newsize, block->max_length);
1334 return -EINVAL;
1337 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1338 block->used_length = newsize;
1339 cpu_physical_memory_set_dirty_range(block->offset, block->used_length);
1340 memory_region_set_size(block->mr, newsize);
1341 if (block->resized) {
1342 block->resized(block->idstr, newsize, block->host);
1344 return 0;
1347 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1349 RAMBlock *block;
1350 ram_addr_t old_ram_size, new_ram_size;
1352 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1354 /* This assumes the iothread lock is taken here too. */
1355 qemu_mutex_lock_ramlist();
1356 new_block->offset = find_ram_offset(new_block->max_length);
1358 if (!new_block->host) {
1359 if (xen_enabled()) {
1360 xen_ram_alloc(new_block->offset, new_block->max_length,
1361 new_block->mr);
1362 } else {
1363 new_block->host = phys_mem_alloc(new_block->max_length,
1364 &new_block->mr->align);
1365 if (!new_block->host) {
1366 error_setg_errno(errp, errno,
1367 "cannot set up guest memory '%s'",
1368 memory_region_name(new_block->mr));
1369 qemu_mutex_unlock_ramlist();
1370 return -1;
1372 memory_try_enable_merging(new_block->host, new_block->max_length);
1376 /* Keep the list sorted from biggest to smallest block. */
1377 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1378 if (block->max_length < new_block->max_length) {
1379 break;
1382 if (block) {
1383 QTAILQ_INSERT_BEFORE(block, new_block, next);
1384 } else {
1385 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1387 ram_list.mru_block = NULL;
1389 ram_list.version++;
1390 qemu_mutex_unlock_ramlist();
1392 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1394 if (new_ram_size > old_ram_size) {
1395 int i;
1396 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1397 ram_list.dirty_memory[i] =
1398 bitmap_zero_extend(ram_list.dirty_memory[i],
1399 old_ram_size, new_ram_size);
1402 cpu_physical_memory_set_dirty_range(new_block->offset,
1403 new_block->used_length);
1405 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1406 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1407 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1409 if (kvm_enabled()) {
1410 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1413 return new_block->offset;
1416 #ifdef __linux__
1417 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1418 bool share, const char *mem_path,
1419 Error **errp)
1421 RAMBlock *new_block;
1422 ram_addr_t addr;
1423 Error *local_err = NULL;
1425 if (xen_enabled()) {
1426 error_setg(errp, "-mem-path not supported with Xen");
1427 return -1;
1430 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1432 * file_ram_alloc() needs to allocate just like
1433 * phys_mem_alloc, but we haven't bothered to provide
1434 * a hook there.
1436 error_setg(errp,
1437 "-mem-path not supported with this accelerator");
1438 return -1;
1441 size = TARGET_PAGE_ALIGN(size);
1442 new_block = g_malloc0(sizeof(*new_block));
1443 new_block->mr = mr;
1444 new_block->used_length = size;
1445 new_block->max_length = size;
1446 new_block->flags = share ? RAM_SHARED : 0;
1447 new_block->host = file_ram_alloc(new_block, size,
1448 mem_path, errp);
1449 if (!new_block->host) {
1450 g_free(new_block);
1451 return -1;
1454 addr = ram_block_add(new_block, &local_err);
1455 if (local_err) {
1456 g_free(new_block);
1457 error_propagate(errp, local_err);
1458 return -1;
1460 return addr;
1462 #endif
1464 static
1465 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1466 void (*resized)(const char*,
1467 uint64_t length,
1468 void *host),
1469 void *host, bool resizeable,
1470 MemoryRegion *mr, Error **errp)
1472 RAMBlock *new_block;
1473 ram_addr_t addr;
1474 Error *local_err = NULL;
1476 size = TARGET_PAGE_ALIGN(size);
1477 max_size = TARGET_PAGE_ALIGN(max_size);
1478 new_block = g_malloc0(sizeof(*new_block));
1479 new_block->mr = mr;
1480 new_block->resized = resized;
1481 new_block->used_length = size;
1482 new_block->max_length = max_size;
1483 assert(max_size >= size);
1484 new_block->fd = -1;
1485 new_block->host = host;
1486 if (host) {
1487 new_block->flags |= RAM_PREALLOC;
1489 if (resizeable) {
1490 new_block->flags |= RAM_RESIZEABLE;
1492 addr = ram_block_add(new_block, &local_err);
1493 if (local_err) {
1494 g_free(new_block);
1495 error_propagate(errp, local_err);
1496 return -1;
1498 return addr;
1501 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1502 MemoryRegion *mr, Error **errp)
1504 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1507 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1509 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1512 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1513 void (*resized)(const char*,
1514 uint64_t length,
1515 void *host),
1516 MemoryRegion *mr, Error **errp)
1518 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1521 void qemu_ram_free_from_ptr(ram_addr_t addr)
1523 RAMBlock *block;
1525 /* This assumes the iothread lock is taken here too. */
1526 qemu_mutex_lock_ramlist();
1527 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1528 if (addr == block->offset) {
1529 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1530 ram_list.mru_block = NULL;
1531 ram_list.version++;
1532 g_free(block);
1533 break;
1536 qemu_mutex_unlock_ramlist();
1539 void qemu_ram_free(ram_addr_t addr)
1541 RAMBlock *block;
1543 /* This assumes the iothread lock is taken here too. */
1544 qemu_mutex_lock_ramlist();
1545 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1546 if (addr == block->offset) {
1547 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1548 ram_list.mru_block = NULL;
1549 ram_list.version++;
1550 if (block->flags & RAM_PREALLOC) {
1552 } else if (xen_enabled()) {
1553 xen_invalidate_map_cache_entry(block->host);
1554 #ifndef _WIN32
1555 } else if (block->fd >= 0) {
1556 munmap(block->host, block->max_length);
1557 close(block->fd);
1558 #endif
1559 } else {
1560 qemu_anon_ram_free(block->host, block->max_length);
1562 g_free(block);
1563 break;
1566 qemu_mutex_unlock_ramlist();
1570 #ifndef _WIN32
1571 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1573 RAMBlock *block;
1574 ram_addr_t offset;
1575 int flags;
1576 void *area, *vaddr;
1578 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1579 offset = addr - block->offset;
1580 if (offset < block->max_length) {
1581 vaddr = ramblock_ptr(block, offset);
1582 if (block->flags & RAM_PREALLOC) {
1584 } else if (xen_enabled()) {
1585 abort();
1586 } else {
1587 flags = MAP_FIXED;
1588 munmap(vaddr, length);
1589 if (block->fd >= 0) {
1590 flags |= (block->flags & RAM_SHARED ?
1591 MAP_SHARED : MAP_PRIVATE);
1592 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1593 flags, block->fd, offset);
1594 } else {
1596 * Remap needs to match alloc. Accelerators that
1597 * set phys_mem_alloc never remap. If they did,
1598 * we'd need a remap hook here.
1600 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1602 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1603 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1604 flags, -1, 0);
1606 if (area != vaddr) {
1607 fprintf(stderr, "Could not remap addr: "
1608 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1609 length, addr);
1610 exit(1);
1612 memory_try_enable_merging(vaddr, length);
1613 qemu_ram_setup_dump(vaddr, length);
1615 return;
1619 #endif /* !_WIN32 */
1621 int qemu_get_ram_fd(ram_addr_t addr)
1623 RAMBlock *block = qemu_get_ram_block(addr);
1625 return block->fd;
1628 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1630 RAMBlock *block = qemu_get_ram_block(addr);
1632 return ramblock_ptr(block, 0);
1635 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1636 With the exception of the softmmu code in this file, this should
1637 only be used for local memory (e.g. video ram) that the device owns,
1638 and knows it isn't going to access beyond the end of the block.
1640 It should not be used for general purpose DMA.
1641 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1643 void *qemu_get_ram_ptr(ram_addr_t addr)
1645 RAMBlock *block = qemu_get_ram_block(addr);
1647 if (xen_enabled()) {
1648 /* We need to check if the requested address is in the RAM
1649 * because we don't want to map the entire memory in QEMU.
1650 * In that case just map until the end of the page.
1652 if (block->offset == 0) {
1653 return xen_map_cache(addr, 0, 0);
1654 } else if (block->host == NULL) {
1655 block->host =
1656 xen_map_cache(block->offset, block->max_length, 1);
1659 return ramblock_ptr(block, addr - block->offset);
1662 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1663 * but takes a size argument */
1664 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1666 if (*size == 0) {
1667 return NULL;
1669 if (xen_enabled()) {
1670 return xen_map_cache(addr, *size, 1);
1671 } else {
1672 RAMBlock *block;
1674 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1675 if (addr - block->offset < block->max_length) {
1676 if (addr - block->offset + *size > block->max_length)
1677 *size = block->max_length - addr + block->offset;
1678 return ramblock_ptr(block, addr - block->offset);
1682 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1683 abort();
1687 /* Some of the softmmu routines need to translate from a host pointer
1688 (typically a TLB entry) back to a ram offset. */
1689 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1691 RAMBlock *block;
1692 uint8_t *host = ptr;
1694 if (xen_enabled()) {
1695 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1696 return qemu_get_ram_block(*ram_addr)->mr;
1699 block = ram_list.mru_block;
1700 if (block && block->host && host - block->host < block->max_length) {
1701 goto found;
1704 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1705 /* This case append when the block is not mapped. */
1706 if (block->host == NULL) {
1707 continue;
1709 if (host - block->host < block->max_length) {
1710 goto found;
1714 return NULL;
1716 found:
1717 *ram_addr = block->offset + (host - block->host);
1718 return block->mr;
1721 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1722 uint64_t val, unsigned size)
1724 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1725 tb_invalidate_phys_page_fast(ram_addr, size);
1727 switch (size) {
1728 case 1:
1729 stb_p(qemu_get_ram_ptr(ram_addr), val);
1730 break;
1731 case 2:
1732 stw_p(qemu_get_ram_ptr(ram_addr), val);
1733 break;
1734 case 4:
1735 stl_p(qemu_get_ram_ptr(ram_addr), val);
1736 break;
1737 default:
1738 abort();
1740 cpu_physical_memory_set_dirty_range_nocode(ram_addr, size);
1741 /* we remove the notdirty callback only if the code has been
1742 flushed */
1743 if (!cpu_physical_memory_is_clean(ram_addr)) {
1744 CPUArchState *env = current_cpu->env_ptr;
1745 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1749 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1750 unsigned size, bool is_write)
1752 return is_write;
1755 static const MemoryRegionOps notdirty_mem_ops = {
1756 .write = notdirty_mem_write,
1757 .valid.accepts = notdirty_mem_accepts,
1758 .endianness = DEVICE_NATIVE_ENDIAN,
1761 /* Generate a debug exception if a watchpoint has been hit. */
1762 static void check_watchpoint(int offset, int len, int flags)
1764 CPUState *cpu = current_cpu;
1765 CPUArchState *env = cpu->env_ptr;
1766 target_ulong pc, cs_base;
1767 target_ulong vaddr;
1768 CPUWatchpoint *wp;
1769 int cpu_flags;
1771 if (cpu->watchpoint_hit) {
1772 /* We re-entered the check after replacing the TB. Now raise
1773 * the debug interrupt so that is will trigger after the
1774 * current instruction. */
1775 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1776 return;
1778 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1779 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1780 if (cpu_watchpoint_address_matches(wp, vaddr, len)
1781 && (wp->flags & flags)) {
1782 if (flags == BP_MEM_READ) {
1783 wp->flags |= BP_WATCHPOINT_HIT_READ;
1784 } else {
1785 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
1787 wp->hitaddr = vaddr;
1788 if (!cpu->watchpoint_hit) {
1789 cpu->watchpoint_hit = wp;
1790 tb_check_watchpoint(cpu);
1791 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1792 cpu->exception_index = EXCP_DEBUG;
1793 cpu_loop_exit(cpu);
1794 } else {
1795 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1796 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1797 cpu_resume_from_signal(cpu, NULL);
1800 } else {
1801 wp->flags &= ~BP_WATCHPOINT_HIT;
1806 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1807 so these check for a hit then pass through to the normal out-of-line
1808 phys routines. */
1809 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1810 unsigned size)
1812 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_READ);
1813 switch (size) {
1814 case 1: return ldub_phys(&address_space_memory, addr);
1815 case 2: return lduw_phys(&address_space_memory, addr);
1816 case 4: return ldl_phys(&address_space_memory, addr);
1817 default: abort();
1821 static void watch_mem_write(void *opaque, hwaddr addr,
1822 uint64_t val, unsigned size)
1824 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_WRITE);
1825 switch (size) {
1826 case 1:
1827 stb_phys(&address_space_memory, addr, val);
1828 break;
1829 case 2:
1830 stw_phys(&address_space_memory, addr, val);
1831 break;
1832 case 4:
1833 stl_phys(&address_space_memory, addr, val);
1834 break;
1835 default: abort();
1839 static const MemoryRegionOps watch_mem_ops = {
1840 .read = watch_mem_read,
1841 .write = watch_mem_write,
1842 .endianness = DEVICE_NATIVE_ENDIAN,
1845 static uint64_t subpage_read(void *opaque, hwaddr addr,
1846 unsigned len)
1848 subpage_t *subpage = opaque;
1849 uint8_t buf[8];
1851 #if defined(DEBUG_SUBPAGE)
1852 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1853 subpage, len, addr);
1854 #endif
1855 address_space_read(subpage->as, addr + subpage->base, buf, len);
1856 switch (len) {
1857 case 1:
1858 return ldub_p(buf);
1859 case 2:
1860 return lduw_p(buf);
1861 case 4:
1862 return ldl_p(buf);
1863 case 8:
1864 return ldq_p(buf);
1865 default:
1866 abort();
1870 static void subpage_write(void *opaque, hwaddr addr,
1871 uint64_t value, unsigned len)
1873 subpage_t *subpage = opaque;
1874 uint8_t buf[8];
1876 #if defined(DEBUG_SUBPAGE)
1877 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1878 " value %"PRIx64"\n",
1879 __func__, subpage, len, addr, value);
1880 #endif
1881 switch (len) {
1882 case 1:
1883 stb_p(buf, value);
1884 break;
1885 case 2:
1886 stw_p(buf, value);
1887 break;
1888 case 4:
1889 stl_p(buf, value);
1890 break;
1891 case 8:
1892 stq_p(buf, value);
1893 break;
1894 default:
1895 abort();
1897 address_space_write(subpage->as, addr + subpage->base, buf, len);
1900 static bool subpage_accepts(void *opaque, hwaddr addr,
1901 unsigned len, bool is_write)
1903 subpage_t *subpage = opaque;
1904 #if defined(DEBUG_SUBPAGE)
1905 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1906 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1907 #endif
1909 return address_space_access_valid(subpage->as, addr + subpage->base,
1910 len, is_write);
1913 static const MemoryRegionOps subpage_ops = {
1914 .read = subpage_read,
1915 .write = subpage_write,
1916 .impl.min_access_size = 1,
1917 .impl.max_access_size = 8,
1918 .valid.min_access_size = 1,
1919 .valid.max_access_size = 8,
1920 .valid.accepts = subpage_accepts,
1921 .endianness = DEVICE_NATIVE_ENDIAN,
1924 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1925 uint16_t section)
1927 int idx, eidx;
1929 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1930 return -1;
1931 idx = SUBPAGE_IDX(start);
1932 eidx = SUBPAGE_IDX(end);
1933 #if defined(DEBUG_SUBPAGE)
1934 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1935 __func__, mmio, start, end, idx, eidx, section);
1936 #endif
1937 for (; idx <= eidx; idx++) {
1938 mmio->sub_section[idx] = section;
1941 return 0;
1944 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1946 subpage_t *mmio;
1948 mmio = g_malloc0(sizeof(subpage_t));
1950 mmio->as = as;
1951 mmio->base = base;
1952 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1953 NULL, TARGET_PAGE_SIZE);
1954 mmio->iomem.subpage = true;
1955 #if defined(DEBUG_SUBPAGE)
1956 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1957 mmio, base, TARGET_PAGE_SIZE);
1958 #endif
1959 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1961 return mmio;
1964 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
1965 MemoryRegion *mr)
1967 assert(as);
1968 MemoryRegionSection section = {
1969 .address_space = as,
1970 .mr = mr,
1971 .offset_within_address_space = 0,
1972 .offset_within_region = 0,
1973 .size = int128_2_64(),
1976 return phys_section_add(map, &section);
1979 MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1981 return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1984 static void io_mem_init(void)
1986 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
1987 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1988 NULL, UINT64_MAX);
1989 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1990 NULL, UINT64_MAX);
1991 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1992 NULL, UINT64_MAX);
1995 static void mem_begin(MemoryListener *listener)
1997 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1998 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1999 uint16_t n;
2001 n = dummy_section(&d->map, as, &io_mem_unassigned);
2002 assert(n == PHYS_SECTION_UNASSIGNED);
2003 n = dummy_section(&d->map, as, &io_mem_notdirty);
2004 assert(n == PHYS_SECTION_NOTDIRTY);
2005 n = dummy_section(&d->map, as, &io_mem_rom);
2006 assert(n == PHYS_SECTION_ROM);
2007 n = dummy_section(&d->map, as, &io_mem_watch);
2008 assert(n == PHYS_SECTION_WATCH);
2010 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2011 d->as = as;
2012 as->next_dispatch = d;
2015 static void mem_commit(MemoryListener *listener)
2017 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2018 AddressSpaceDispatch *cur = as->dispatch;
2019 AddressSpaceDispatch *next = as->next_dispatch;
2021 phys_page_compact_all(next, next->map.nodes_nb);
2023 as->dispatch = next;
2025 if (cur) {
2026 phys_sections_free(&cur->map);
2027 g_free(cur);
2031 static void tcg_commit(MemoryListener *listener)
2033 CPUState *cpu;
2035 /* since each CPU stores ram addresses in its TLB cache, we must
2036 reset the modified entries */
2037 /* XXX: slow ! */
2038 CPU_FOREACH(cpu) {
2039 /* FIXME: Disentangle the cpu.h circular files deps so we can
2040 directly get the right CPU from listener. */
2041 if (cpu->tcg_as_listener != listener) {
2042 continue;
2044 tlb_flush(cpu, 1);
2048 static void core_log_global_start(MemoryListener *listener)
2050 cpu_physical_memory_set_dirty_tracking(true);
2053 static void core_log_global_stop(MemoryListener *listener)
2055 cpu_physical_memory_set_dirty_tracking(false);
2058 static MemoryListener core_memory_listener = {
2059 .log_global_start = core_log_global_start,
2060 .log_global_stop = core_log_global_stop,
2061 .priority = 1,
2064 void address_space_init_dispatch(AddressSpace *as)
2066 as->dispatch = NULL;
2067 as->dispatch_listener = (MemoryListener) {
2068 .begin = mem_begin,
2069 .commit = mem_commit,
2070 .region_add = mem_add,
2071 .region_nop = mem_add,
2072 .priority = 0,
2074 memory_listener_register(&as->dispatch_listener, as);
2077 void address_space_destroy_dispatch(AddressSpace *as)
2079 AddressSpaceDispatch *d = as->dispatch;
2081 memory_listener_unregister(&as->dispatch_listener);
2082 g_free(d);
2083 as->dispatch = NULL;
2086 static void memory_map_init(void)
2088 system_memory = g_malloc(sizeof(*system_memory));
2090 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2091 address_space_init(&address_space_memory, system_memory, "memory");
2093 system_io = g_malloc(sizeof(*system_io));
2094 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2095 65536);
2096 address_space_init(&address_space_io, system_io, "I/O");
2098 memory_listener_register(&core_memory_listener, &address_space_memory);
2101 MemoryRegion *get_system_memory(void)
2103 return system_memory;
2106 MemoryRegion *get_system_io(void)
2108 return system_io;
2111 #endif /* !defined(CONFIG_USER_ONLY) */
2113 /* physical memory access (slow version, mainly for debug) */
2114 #if defined(CONFIG_USER_ONLY)
2115 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2116 uint8_t *buf, int len, int is_write)
2118 int l, flags;
2119 target_ulong page;
2120 void * p;
2122 while (len > 0) {
2123 page = addr & TARGET_PAGE_MASK;
2124 l = (page + TARGET_PAGE_SIZE) - addr;
2125 if (l > len)
2126 l = len;
2127 flags = page_get_flags(page);
2128 if (!(flags & PAGE_VALID))
2129 return -1;
2130 if (is_write) {
2131 if (!(flags & PAGE_WRITE))
2132 return -1;
2133 /* XXX: this code should not depend on lock_user */
2134 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2135 return -1;
2136 memcpy(p, buf, l);
2137 unlock_user(p, addr, l);
2138 } else {
2139 if (!(flags & PAGE_READ))
2140 return -1;
2141 /* XXX: this code should not depend on lock_user */
2142 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2143 return -1;
2144 memcpy(buf, p, l);
2145 unlock_user(p, addr, 0);
2147 len -= l;
2148 buf += l;
2149 addr += l;
2151 return 0;
2154 #else
2156 static void invalidate_and_set_dirty(hwaddr addr,
2157 hwaddr length)
2159 if (cpu_physical_memory_range_includes_clean(addr, length)) {
2160 tb_invalidate_phys_range(addr, addr + length, 0);
2161 cpu_physical_memory_set_dirty_range_nocode(addr, length);
2163 xen_modified_memory(addr, length);
2166 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2168 unsigned access_size_max = mr->ops->valid.max_access_size;
2170 /* Regions are assumed to support 1-4 byte accesses unless
2171 otherwise specified. */
2172 if (access_size_max == 0) {
2173 access_size_max = 4;
2176 /* Bound the maximum access by the alignment of the address. */
2177 if (!mr->ops->impl.unaligned) {
2178 unsigned align_size_max = addr & -addr;
2179 if (align_size_max != 0 && align_size_max < access_size_max) {
2180 access_size_max = align_size_max;
2184 /* Don't attempt accesses larger than the maximum. */
2185 if (l > access_size_max) {
2186 l = access_size_max;
2188 if (l & (l - 1)) {
2189 l = 1 << (qemu_fls(l) - 1);
2192 return l;
2195 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
2196 int len, bool is_write)
2198 hwaddr l;
2199 uint8_t *ptr;
2200 uint64_t val;
2201 hwaddr addr1;
2202 MemoryRegion *mr;
2203 bool error = false;
2205 while (len > 0) {
2206 l = len;
2207 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2209 if (is_write) {
2210 if (!memory_access_is_direct(mr, is_write)) {
2211 l = memory_access_size(mr, l, addr1);
2212 /* XXX: could force current_cpu to NULL to avoid
2213 potential bugs */
2214 switch (l) {
2215 case 8:
2216 /* 64 bit write access */
2217 val = ldq_p(buf);
2218 error |= io_mem_write(mr, addr1, val, 8);
2219 break;
2220 case 4:
2221 /* 32 bit write access */
2222 val = ldl_p(buf);
2223 error |= io_mem_write(mr, addr1, val, 4);
2224 break;
2225 case 2:
2226 /* 16 bit write access */
2227 val = lduw_p(buf);
2228 error |= io_mem_write(mr, addr1, val, 2);
2229 break;
2230 case 1:
2231 /* 8 bit write access */
2232 val = ldub_p(buf);
2233 error |= io_mem_write(mr, addr1, val, 1);
2234 break;
2235 default:
2236 abort();
2238 } else {
2239 addr1 += memory_region_get_ram_addr(mr);
2240 /* RAM case */
2241 ptr = qemu_get_ram_ptr(addr1);
2242 memcpy(ptr, buf, l);
2243 invalidate_and_set_dirty(addr1, l);
2245 } else {
2246 if (!memory_access_is_direct(mr, is_write)) {
2247 /* I/O case */
2248 l = memory_access_size(mr, l, addr1);
2249 switch (l) {
2250 case 8:
2251 /* 64 bit read access */
2252 error |= io_mem_read(mr, addr1, &val, 8);
2253 stq_p(buf, val);
2254 break;
2255 case 4:
2256 /* 32 bit read access */
2257 error |= io_mem_read(mr, addr1, &val, 4);
2258 stl_p(buf, val);
2259 break;
2260 case 2:
2261 /* 16 bit read access */
2262 error |= io_mem_read(mr, addr1, &val, 2);
2263 stw_p(buf, val);
2264 break;
2265 case 1:
2266 /* 8 bit read access */
2267 error |= io_mem_read(mr, addr1, &val, 1);
2268 stb_p(buf, val);
2269 break;
2270 default:
2271 abort();
2273 } else {
2274 /* RAM case */
2275 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2276 memcpy(buf, ptr, l);
2279 len -= l;
2280 buf += l;
2281 addr += l;
2284 return error;
2287 bool address_space_write(AddressSpace *as, hwaddr addr,
2288 const uint8_t *buf, int len)
2290 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2293 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2295 return address_space_rw(as, addr, buf, len, false);
2299 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2300 int len, int is_write)
2302 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2305 enum write_rom_type {
2306 WRITE_DATA,
2307 FLUSH_CACHE,
2310 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2311 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2313 hwaddr l;
2314 uint8_t *ptr;
2315 hwaddr addr1;
2316 MemoryRegion *mr;
2318 while (len > 0) {
2319 l = len;
2320 mr = address_space_translate(as, addr, &addr1, &l, true);
2322 if (!(memory_region_is_ram(mr) ||
2323 memory_region_is_romd(mr))) {
2324 /* do nothing */
2325 } else {
2326 addr1 += memory_region_get_ram_addr(mr);
2327 /* ROM/RAM case */
2328 ptr = qemu_get_ram_ptr(addr1);
2329 switch (type) {
2330 case WRITE_DATA:
2331 memcpy(ptr, buf, l);
2332 invalidate_and_set_dirty(addr1, l);
2333 break;
2334 case FLUSH_CACHE:
2335 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2336 break;
2339 len -= l;
2340 buf += l;
2341 addr += l;
2345 /* used for ROM loading : can write in RAM and ROM */
2346 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2347 const uint8_t *buf, int len)
2349 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2352 void cpu_flush_icache_range(hwaddr start, int len)
2355 * This function should do the same thing as an icache flush that was
2356 * triggered from within the guest. For TCG we are always cache coherent,
2357 * so there is no need to flush anything. For KVM / Xen we need to flush
2358 * the host's instruction cache at least.
2360 if (tcg_enabled()) {
2361 return;
2364 cpu_physical_memory_write_rom_internal(&address_space_memory,
2365 start, NULL, len, FLUSH_CACHE);
2368 typedef struct {
2369 MemoryRegion *mr;
2370 void *buffer;
2371 hwaddr addr;
2372 hwaddr len;
2373 } BounceBuffer;
2375 static BounceBuffer bounce;
2377 typedef struct MapClient {
2378 void *opaque;
2379 void (*callback)(void *opaque);
2380 QLIST_ENTRY(MapClient) link;
2381 } MapClient;
2383 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2384 = QLIST_HEAD_INITIALIZER(map_client_list);
2386 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2388 MapClient *client = g_malloc(sizeof(*client));
2390 client->opaque = opaque;
2391 client->callback = callback;
2392 QLIST_INSERT_HEAD(&map_client_list, client, link);
2393 return client;
2396 static void cpu_unregister_map_client(void *_client)
2398 MapClient *client = (MapClient *)_client;
2400 QLIST_REMOVE(client, link);
2401 g_free(client);
2404 static void cpu_notify_map_clients(void)
2406 MapClient *client;
2408 while (!QLIST_EMPTY(&map_client_list)) {
2409 client = QLIST_FIRST(&map_client_list);
2410 client->callback(client->opaque);
2411 cpu_unregister_map_client(client);
2415 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2417 MemoryRegion *mr;
2418 hwaddr l, xlat;
2420 while (len > 0) {
2421 l = len;
2422 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2423 if (!memory_access_is_direct(mr, is_write)) {
2424 l = memory_access_size(mr, l, addr);
2425 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2426 return false;
2430 len -= l;
2431 addr += l;
2433 return true;
2436 /* Map a physical memory region into a host virtual address.
2437 * May map a subset of the requested range, given by and returned in *plen.
2438 * May return NULL if resources needed to perform the mapping are exhausted.
2439 * Use only for reads OR writes - not for read-modify-write operations.
2440 * Use cpu_register_map_client() to know when retrying the map operation is
2441 * likely to succeed.
2443 void *address_space_map(AddressSpace *as,
2444 hwaddr addr,
2445 hwaddr *plen,
2446 bool is_write)
2448 hwaddr len = *plen;
2449 hwaddr done = 0;
2450 hwaddr l, xlat, base;
2451 MemoryRegion *mr, *this_mr;
2452 ram_addr_t raddr;
2454 if (len == 0) {
2455 return NULL;
2458 l = len;
2459 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2460 if (!memory_access_is_direct(mr, is_write)) {
2461 if (bounce.buffer) {
2462 return NULL;
2464 /* Avoid unbounded allocations */
2465 l = MIN(l, TARGET_PAGE_SIZE);
2466 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2467 bounce.addr = addr;
2468 bounce.len = l;
2470 memory_region_ref(mr);
2471 bounce.mr = mr;
2472 if (!is_write) {
2473 address_space_read(as, addr, bounce.buffer, l);
2476 *plen = l;
2477 return bounce.buffer;
2480 base = xlat;
2481 raddr = memory_region_get_ram_addr(mr);
2483 for (;;) {
2484 len -= l;
2485 addr += l;
2486 done += l;
2487 if (len == 0) {
2488 break;
2491 l = len;
2492 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2493 if (this_mr != mr || xlat != base + done) {
2494 break;
2498 memory_region_ref(mr);
2499 *plen = done;
2500 return qemu_ram_ptr_length(raddr + base, plen);
2503 /* Unmaps a memory region previously mapped by address_space_map().
2504 * Will also mark the memory as dirty if is_write == 1. access_len gives
2505 * the amount of memory that was actually read or written by the caller.
2507 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2508 int is_write, hwaddr access_len)
2510 if (buffer != bounce.buffer) {
2511 MemoryRegion *mr;
2512 ram_addr_t addr1;
2514 mr = qemu_ram_addr_from_host(buffer, &addr1);
2515 assert(mr != NULL);
2516 if (is_write) {
2517 invalidate_and_set_dirty(addr1, access_len);
2519 if (xen_enabled()) {
2520 xen_invalidate_map_cache_entry(buffer);
2522 memory_region_unref(mr);
2523 return;
2525 if (is_write) {
2526 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2528 qemu_vfree(bounce.buffer);
2529 bounce.buffer = NULL;
2530 memory_region_unref(bounce.mr);
2531 cpu_notify_map_clients();
2534 void *cpu_physical_memory_map(hwaddr addr,
2535 hwaddr *plen,
2536 int is_write)
2538 return address_space_map(&address_space_memory, addr, plen, is_write);
2541 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2542 int is_write, hwaddr access_len)
2544 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2547 /* warning: addr must be aligned */
2548 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2549 enum device_endian endian)
2551 uint8_t *ptr;
2552 uint64_t val;
2553 MemoryRegion *mr;
2554 hwaddr l = 4;
2555 hwaddr addr1;
2557 mr = address_space_translate(as, addr, &addr1, &l, false);
2558 if (l < 4 || !memory_access_is_direct(mr, false)) {
2559 /* I/O case */
2560 io_mem_read(mr, addr1, &val, 4);
2561 #if defined(TARGET_WORDS_BIGENDIAN)
2562 if (endian == DEVICE_LITTLE_ENDIAN) {
2563 val = bswap32(val);
2565 #else
2566 if (endian == DEVICE_BIG_ENDIAN) {
2567 val = bswap32(val);
2569 #endif
2570 } else {
2571 /* RAM case */
2572 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2573 & TARGET_PAGE_MASK)
2574 + addr1);
2575 switch (endian) {
2576 case DEVICE_LITTLE_ENDIAN:
2577 val = ldl_le_p(ptr);
2578 break;
2579 case DEVICE_BIG_ENDIAN:
2580 val = ldl_be_p(ptr);
2581 break;
2582 default:
2583 val = ldl_p(ptr);
2584 break;
2587 return val;
2590 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2592 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2595 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2597 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2600 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2602 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2605 /* warning: addr must be aligned */
2606 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2607 enum device_endian endian)
2609 uint8_t *ptr;
2610 uint64_t val;
2611 MemoryRegion *mr;
2612 hwaddr l = 8;
2613 hwaddr addr1;
2615 mr = address_space_translate(as, addr, &addr1, &l,
2616 false);
2617 if (l < 8 || !memory_access_is_direct(mr, false)) {
2618 /* I/O case */
2619 io_mem_read(mr, addr1, &val, 8);
2620 #if defined(TARGET_WORDS_BIGENDIAN)
2621 if (endian == DEVICE_LITTLE_ENDIAN) {
2622 val = bswap64(val);
2624 #else
2625 if (endian == DEVICE_BIG_ENDIAN) {
2626 val = bswap64(val);
2628 #endif
2629 } else {
2630 /* RAM case */
2631 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2632 & TARGET_PAGE_MASK)
2633 + addr1);
2634 switch (endian) {
2635 case DEVICE_LITTLE_ENDIAN:
2636 val = ldq_le_p(ptr);
2637 break;
2638 case DEVICE_BIG_ENDIAN:
2639 val = ldq_be_p(ptr);
2640 break;
2641 default:
2642 val = ldq_p(ptr);
2643 break;
2646 return val;
2649 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2651 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2654 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2656 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2659 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2661 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2664 /* XXX: optimize */
2665 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2667 uint8_t val;
2668 address_space_rw(as, addr, &val, 1, 0);
2669 return val;
2672 /* warning: addr must be aligned */
2673 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2674 enum device_endian endian)
2676 uint8_t *ptr;
2677 uint64_t val;
2678 MemoryRegion *mr;
2679 hwaddr l = 2;
2680 hwaddr addr1;
2682 mr = address_space_translate(as, addr, &addr1, &l,
2683 false);
2684 if (l < 2 || !memory_access_is_direct(mr, false)) {
2685 /* I/O case */
2686 io_mem_read(mr, addr1, &val, 2);
2687 #if defined(TARGET_WORDS_BIGENDIAN)
2688 if (endian == DEVICE_LITTLE_ENDIAN) {
2689 val = bswap16(val);
2691 #else
2692 if (endian == DEVICE_BIG_ENDIAN) {
2693 val = bswap16(val);
2695 #endif
2696 } else {
2697 /* RAM case */
2698 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2699 & TARGET_PAGE_MASK)
2700 + addr1);
2701 switch (endian) {
2702 case DEVICE_LITTLE_ENDIAN:
2703 val = lduw_le_p(ptr);
2704 break;
2705 case DEVICE_BIG_ENDIAN:
2706 val = lduw_be_p(ptr);
2707 break;
2708 default:
2709 val = lduw_p(ptr);
2710 break;
2713 return val;
2716 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2718 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2721 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2723 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2726 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2728 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2731 /* warning: addr must be aligned. The ram page is not masked as dirty
2732 and the code inside is not invalidated. It is useful if the dirty
2733 bits are used to track modified PTEs */
2734 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2736 uint8_t *ptr;
2737 MemoryRegion *mr;
2738 hwaddr l = 4;
2739 hwaddr addr1;
2741 mr = address_space_translate(as, addr, &addr1, &l,
2742 true);
2743 if (l < 4 || !memory_access_is_direct(mr, true)) {
2744 io_mem_write(mr, addr1, val, 4);
2745 } else {
2746 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2747 ptr = qemu_get_ram_ptr(addr1);
2748 stl_p(ptr, val);
2750 if (unlikely(in_migration)) {
2751 if (cpu_physical_memory_is_clean(addr1)) {
2752 /* invalidate code */
2753 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2754 /* set dirty bit */
2755 cpu_physical_memory_set_dirty_range_nocode(addr1, 4);
2761 /* warning: addr must be aligned */
2762 static inline void stl_phys_internal(AddressSpace *as,
2763 hwaddr addr, uint32_t val,
2764 enum device_endian endian)
2766 uint8_t *ptr;
2767 MemoryRegion *mr;
2768 hwaddr l = 4;
2769 hwaddr addr1;
2771 mr = address_space_translate(as, addr, &addr1, &l,
2772 true);
2773 if (l < 4 || !memory_access_is_direct(mr, true)) {
2774 #if defined(TARGET_WORDS_BIGENDIAN)
2775 if (endian == DEVICE_LITTLE_ENDIAN) {
2776 val = bswap32(val);
2778 #else
2779 if (endian == DEVICE_BIG_ENDIAN) {
2780 val = bswap32(val);
2782 #endif
2783 io_mem_write(mr, addr1, val, 4);
2784 } else {
2785 /* RAM case */
2786 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2787 ptr = qemu_get_ram_ptr(addr1);
2788 switch (endian) {
2789 case DEVICE_LITTLE_ENDIAN:
2790 stl_le_p(ptr, val);
2791 break;
2792 case DEVICE_BIG_ENDIAN:
2793 stl_be_p(ptr, val);
2794 break;
2795 default:
2796 stl_p(ptr, val);
2797 break;
2799 invalidate_and_set_dirty(addr1, 4);
2803 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2805 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2808 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2810 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2813 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2815 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2818 /* XXX: optimize */
2819 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2821 uint8_t v = val;
2822 address_space_rw(as, addr, &v, 1, 1);
2825 /* warning: addr must be aligned */
2826 static inline void stw_phys_internal(AddressSpace *as,
2827 hwaddr addr, uint32_t val,
2828 enum device_endian endian)
2830 uint8_t *ptr;
2831 MemoryRegion *mr;
2832 hwaddr l = 2;
2833 hwaddr addr1;
2835 mr = address_space_translate(as, addr, &addr1, &l, true);
2836 if (l < 2 || !memory_access_is_direct(mr, true)) {
2837 #if defined(TARGET_WORDS_BIGENDIAN)
2838 if (endian == DEVICE_LITTLE_ENDIAN) {
2839 val = bswap16(val);
2841 #else
2842 if (endian == DEVICE_BIG_ENDIAN) {
2843 val = bswap16(val);
2845 #endif
2846 io_mem_write(mr, addr1, val, 2);
2847 } else {
2848 /* RAM case */
2849 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2850 ptr = qemu_get_ram_ptr(addr1);
2851 switch (endian) {
2852 case DEVICE_LITTLE_ENDIAN:
2853 stw_le_p(ptr, val);
2854 break;
2855 case DEVICE_BIG_ENDIAN:
2856 stw_be_p(ptr, val);
2857 break;
2858 default:
2859 stw_p(ptr, val);
2860 break;
2862 invalidate_and_set_dirty(addr1, 2);
2866 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2868 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2871 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2873 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2876 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2878 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2881 /* XXX: optimize */
2882 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2884 val = tswap64(val);
2885 address_space_rw(as, addr, (void *) &val, 8, 1);
2888 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2890 val = cpu_to_le64(val);
2891 address_space_rw(as, addr, (void *) &val, 8, 1);
2894 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2896 val = cpu_to_be64(val);
2897 address_space_rw(as, addr, (void *) &val, 8, 1);
2900 /* virtual memory access for debug (includes writing to ROM) */
2901 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2902 uint8_t *buf, int len, int is_write)
2904 int l;
2905 hwaddr phys_addr;
2906 target_ulong page;
2908 while (len > 0) {
2909 page = addr & TARGET_PAGE_MASK;
2910 phys_addr = cpu_get_phys_page_debug(cpu, page);
2911 /* if no physical page mapped, return an error */
2912 if (phys_addr == -1)
2913 return -1;
2914 l = (page + TARGET_PAGE_SIZE) - addr;
2915 if (l > len)
2916 l = len;
2917 phys_addr += (addr & ~TARGET_PAGE_MASK);
2918 if (is_write) {
2919 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
2920 } else {
2921 address_space_rw(cpu->as, phys_addr, buf, l, 0);
2923 len -= l;
2924 buf += l;
2925 addr += l;
2927 return 0;
2929 #endif
2932 * A helper function for the _utterly broken_ virtio device model to find out if
2933 * it's running on a big endian machine. Don't do this at home kids!
2935 bool target_words_bigendian(void);
2936 bool target_words_bigendian(void)
2938 #if defined(TARGET_WORDS_BIGENDIAN)
2939 return true;
2940 #else
2941 return false;
2942 #endif
2945 #ifndef CONFIG_USER_ONLY
2946 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2948 MemoryRegion*mr;
2949 hwaddr l = 1;
2951 mr = address_space_translate(&address_space_memory,
2952 phys_addr, &phys_addr, &l, false);
2954 return !(memory_region_is_ram(mr) ||
2955 memory_region_is_romd(mr));
2958 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2960 RAMBlock *block;
2962 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2963 func(block->host, block->offset, block->used_length, opaque);
2966 #endif