vfio-pci: Fix missing unparent of dynamically allocated MemoryRegion
[qemu/kevin.git] / exec.c
blob6b79ad1d111ae6206c6145bcaa1d3eba582d0768
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #include "hw/qdev.h"
30 #include "qemu/osdep.h"
31 #include "sysemu/kvm.h"
32 #include "sysemu/sysemu.h"
33 #include "hw/xen/xen.h"
34 #include "qemu/timer.h"
35 #include "qemu/config-file.h"
36 #include "qemu/error-report.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
51 #include "exec/memory-internal.h"
52 #include "exec/ram_addr.h"
54 #include "qemu/range.h"
56 //#define DEBUG_SUBPAGE
58 #if !defined(CONFIG_USER_ONLY)
59 static bool in_migration;
61 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
63 static MemoryRegion *system_memory;
64 static MemoryRegion *system_io;
66 AddressSpace address_space_io;
67 AddressSpace address_space_memory;
69 MemoryRegion io_mem_rom, io_mem_notdirty;
70 static MemoryRegion io_mem_unassigned;
72 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
73 #define RAM_PREALLOC (1 << 0)
75 /* RAM is mmap-ed with MAP_SHARED */
76 #define RAM_SHARED (1 << 1)
78 /* Only a portion of RAM (used_length) is actually used, and migrated.
79 * This used_length size can change across reboots.
81 #define RAM_RESIZEABLE (1 << 2)
83 #endif
85 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
86 /* current CPU in the current thread. It is only valid inside
87 cpu_exec() */
88 DEFINE_TLS(CPUState *, current_cpu);
89 /* 0 = Do not count executed instructions.
90 1 = Precise instruction counting.
91 2 = Adaptive rate instruction counting. */
92 int use_icount;
94 #if !defined(CONFIG_USER_ONLY)
96 typedef struct PhysPageEntry PhysPageEntry;
98 struct PhysPageEntry {
99 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
100 uint32_t skip : 6;
101 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
102 uint32_t ptr : 26;
105 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
107 /* Size of the L2 (and L3, etc) page tables. */
108 #define ADDR_SPACE_BITS 64
110 #define P_L2_BITS 9
111 #define P_L2_SIZE (1 << P_L2_BITS)
113 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
115 typedef PhysPageEntry Node[P_L2_SIZE];
117 typedef struct PhysPageMap {
118 unsigned sections_nb;
119 unsigned sections_nb_alloc;
120 unsigned nodes_nb;
121 unsigned nodes_nb_alloc;
122 Node *nodes;
123 MemoryRegionSection *sections;
124 } PhysPageMap;
126 struct AddressSpaceDispatch {
127 /* This is a multi-level map on the physical address space.
128 * The bottom level has pointers to MemoryRegionSections.
130 PhysPageEntry phys_map;
131 PhysPageMap map;
132 AddressSpace *as;
135 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
136 typedef struct subpage_t {
137 MemoryRegion iomem;
138 AddressSpace *as;
139 hwaddr base;
140 uint16_t sub_section[TARGET_PAGE_SIZE];
141 } subpage_t;
143 #define PHYS_SECTION_UNASSIGNED 0
144 #define PHYS_SECTION_NOTDIRTY 1
145 #define PHYS_SECTION_ROM 2
146 #define PHYS_SECTION_WATCH 3
148 static void io_mem_init(void);
149 static void memory_map_init(void);
150 static void tcg_commit(MemoryListener *listener);
152 static MemoryRegion io_mem_watch;
153 #endif
155 #if !defined(CONFIG_USER_ONLY)
157 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
159 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
160 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
161 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
162 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
166 static uint32_t phys_map_node_alloc(PhysPageMap *map)
168 unsigned i;
169 uint32_t ret;
171 ret = map->nodes_nb++;
172 assert(ret != PHYS_MAP_NODE_NIL);
173 assert(ret != map->nodes_nb_alloc);
174 for (i = 0; i < P_L2_SIZE; ++i) {
175 map->nodes[ret][i].skip = 1;
176 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
178 return ret;
181 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
182 hwaddr *index, hwaddr *nb, uint16_t leaf,
183 int level)
185 PhysPageEntry *p;
186 int i;
187 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
189 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
190 lp->ptr = phys_map_node_alloc(map);
191 p = map->nodes[lp->ptr];
192 if (level == 0) {
193 for (i = 0; i < P_L2_SIZE; i++) {
194 p[i].skip = 0;
195 p[i].ptr = PHYS_SECTION_UNASSIGNED;
198 } else {
199 p = map->nodes[lp->ptr];
201 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
203 while (*nb && lp < &p[P_L2_SIZE]) {
204 if ((*index & (step - 1)) == 0 && *nb >= step) {
205 lp->skip = 0;
206 lp->ptr = leaf;
207 *index += step;
208 *nb -= step;
209 } else {
210 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
212 ++lp;
216 static void phys_page_set(AddressSpaceDispatch *d,
217 hwaddr index, hwaddr nb,
218 uint16_t leaf)
220 /* Wildly overreserve - it doesn't matter much. */
221 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
223 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
226 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
227 * and update our entry so we can skip it and go directly to the destination.
229 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
231 unsigned valid_ptr = P_L2_SIZE;
232 int valid = 0;
233 PhysPageEntry *p;
234 int i;
236 if (lp->ptr == PHYS_MAP_NODE_NIL) {
237 return;
240 p = nodes[lp->ptr];
241 for (i = 0; i < P_L2_SIZE; i++) {
242 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
243 continue;
246 valid_ptr = i;
247 valid++;
248 if (p[i].skip) {
249 phys_page_compact(&p[i], nodes, compacted);
253 /* We can only compress if there's only one child. */
254 if (valid != 1) {
255 return;
258 assert(valid_ptr < P_L2_SIZE);
260 /* Don't compress if it won't fit in the # of bits we have. */
261 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
262 return;
265 lp->ptr = p[valid_ptr].ptr;
266 if (!p[valid_ptr].skip) {
267 /* If our only child is a leaf, make this a leaf. */
268 /* By design, we should have made this node a leaf to begin with so we
269 * should never reach here.
270 * But since it's so simple to handle this, let's do it just in case we
271 * change this rule.
273 lp->skip = 0;
274 } else {
275 lp->skip += p[valid_ptr].skip;
279 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
281 DECLARE_BITMAP(compacted, nodes_nb);
283 if (d->phys_map.skip) {
284 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
288 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
289 Node *nodes, MemoryRegionSection *sections)
291 PhysPageEntry *p;
292 hwaddr index = addr >> TARGET_PAGE_BITS;
293 int i;
295 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
296 if (lp.ptr == PHYS_MAP_NODE_NIL) {
297 return &sections[PHYS_SECTION_UNASSIGNED];
299 p = nodes[lp.ptr];
300 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
303 if (sections[lp.ptr].size.hi ||
304 range_covers_byte(sections[lp.ptr].offset_within_address_space,
305 sections[lp.ptr].size.lo, addr)) {
306 return &sections[lp.ptr];
307 } else {
308 return &sections[PHYS_SECTION_UNASSIGNED];
312 bool memory_region_is_unassigned(MemoryRegion *mr)
314 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
315 && mr != &io_mem_watch;
318 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
319 hwaddr addr,
320 bool resolve_subpage)
322 MemoryRegionSection *section;
323 subpage_t *subpage;
325 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
326 if (resolve_subpage && section->mr->subpage) {
327 subpage = container_of(section->mr, subpage_t, iomem);
328 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
330 return section;
333 static MemoryRegionSection *
334 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
335 hwaddr *plen, bool resolve_subpage)
337 MemoryRegionSection *section;
338 Int128 diff;
340 section = address_space_lookup_region(d, addr, resolve_subpage);
341 /* Compute offset within MemoryRegionSection */
342 addr -= section->offset_within_address_space;
344 /* Compute offset within MemoryRegion */
345 *xlat = addr + section->offset_within_region;
347 diff = int128_sub(section->mr->size, int128_make64(addr));
348 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
349 return section;
352 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
354 if (memory_region_is_ram(mr)) {
355 return !(is_write && mr->readonly);
357 if (memory_region_is_romd(mr)) {
358 return !is_write;
361 return false;
364 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
365 hwaddr *xlat, hwaddr *plen,
366 bool is_write)
368 IOMMUTLBEntry iotlb;
369 MemoryRegionSection *section;
370 MemoryRegion *mr;
371 hwaddr len = *plen;
373 for (;;) {
374 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
375 mr = section->mr;
377 if (!mr->iommu_ops) {
378 break;
381 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
382 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
383 | (addr & iotlb.addr_mask));
384 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
385 if (!(iotlb.perm & (1 << is_write))) {
386 mr = &io_mem_unassigned;
387 break;
390 as = iotlb.target_as;
393 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
394 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
395 len = MIN(page, len);
398 *plen = len;
399 *xlat = addr;
400 return mr;
403 MemoryRegionSection *
404 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
405 hwaddr *plen)
407 MemoryRegionSection *section;
408 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
410 assert(!section->mr->iommu_ops);
411 return section;
413 #endif
415 void cpu_exec_init_all(void)
417 #if !defined(CONFIG_USER_ONLY)
418 qemu_mutex_init(&ram_list.mutex);
419 memory_map_init();
420 io_mem_init();
421 #endif
424 #if !defined(CONFIG_USER_ONLY)
426 static int cpu_common_post_load(void *opaque, int version_id)
428 CPUState *cpu = opaque;
430 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
431 version_id is increased. */
432 cpu->interrupt_request &= ~0x01;
433 tlb_flush(cpu, 1);
435 return 0;
438 static int cpu_common_pre_load(void *opaque)
440 CPUState *cpu = opaque;
442 cpu->exception_index = -1;
444 return 0;
447 static bool cpu_common_exception_index_needed(void *opaque)
449 CPUState *cpu = opaque;
451 return tcg_enabled() && cpu->exception_index != -1;
454 static const VMStateDescription vmstate_cpu_common_exception_index = {
455 .name = "cpu_common/exception_index",
456 .version_id = 1,
457 .minimum_version_id = 1,
458 .fields = (VMStateField[]) {
459 VMSTATE_INT32(exception_index, CPUState),
460 VMSTATE_END_OF_LIST()
464 const VMStateDescription vmstate_cpu_common = {
465 .name = "cpu_common",
466 .version_id = 1,
467 .minimum_version_id = 1,
468 .pre_load = cpu_common_pre_load,
469 .post_load = cpu_common_post_load,
470 .fields = (VMStateField[]) {
471 VMSTATE_UINT32(halted, CPUState),
472 VMSTATE_UINT32(interrupt_request, CPUState),
473 VMSTATE_END_OF_LIST()
475 .subsections = (VMStateSubsection[]) {
477 .vmsd = &vmstate_cpu_common_exception_index,
478 .needed = cpu_common_exception_index_needed,
479 } , {
480 /* empty */
485 #endif
487 CPUState *qemu_get_cpu(int index)
489 CPUState *cpu;
491 CPU_FOREACH(cpu) {
492 if (cpu->cpu_index == index) {
493 return cpu;
497 return NULL;
500 #if !defined(CONFIG_USER_ONLY)
501 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
503 /* We only support one address space per cpu at the moment. */
504 assert(cpu->as == as);
506 if (cpu->tcg_as_listener) {
507 memory_listener_unregister(cpu->tcg_as_listener);
508 } else {
509 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
511 cpu->tcg_as_listener->commit = tcg_commit;
512 memory_listener_register(cpu->tcg_as_listener, as);
514 #endif
516 void cpu_exec_init(CPUArchState *env)
518 CPUState *cpu = ENV_GET_CPU(env);
519 CPUClass *cc = CPU_GET_CLASS(cpu);
520 CPUState *some_cpu;
521 int cpu_index;
523 #if defined(CONFIG_USER_ONLY)
524 cpu_list_lock();
525 #endif
526 cpu_index = 0;
527 CPU_FOREACH(some_cpu) {
528 cpu_index++;
530 cpu->cpu_index = cpu_index;
531 cpu->numa_node = 0;
532 QTAILQ_INIT(&cpu->breakpoints);
533 QTAILQ_INIT(&cpu->watchpoints);
534 #ifndef CONFIG_USER_ONLY
535 cpu->as = &address_space_memory;
536 cpu->thread_id = qemu_get_thread_id();
537 #endif
538 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
539 #if defined(CONFIG_USER_ONLY)
540 cpu_list_unlock();
541 #endif
542 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
543 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
545 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
546 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
547 cpu_save, cpu_load, env);
548 assert(cc->vmsd == NULL);
549 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
550 #endif
551 if (cc->vmsd != NULL) {
552 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
556 #if defined(CONFIG_USER_ONLY)
557 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
559 tb_invalidate_phys_page_range(pc, pc + 1, 0);
561 #else
562 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
564 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
565 if (phys != -1) {
566 tb_invalidate_phys_addr(cpu->as,
567 phys | (pc & ~TARGET_PAGE_MASK));
570 #endif
572 #if defined(CONFIG_USER_ONLY)
573 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
578 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
579 int flags)
581 return -ENOSYS;
584 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
588 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
589 int flags, CPUWatchpoint **watchpoint)
591 return -ENOSYS;
593 #else
594 /* Add a watchpoint. */
595 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
596 int flags, CPUWatchpoint **watchpoint)
598 CPUWatchpoint *wp;
600 /* forbid ranges which are empty or run off the end of the address space */
601 if (len == 0 || (addr + len - 1) < addr) {
602 error_report("tried to set invalid watchpoint at %"
603 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
604 return -EINVAL;
606 wp = g_malloc(sizeof(*wp));
608 wp->vaddr = addr;
609 wp->len = len;
610 wp->flags = flags;
612 /* keep all GDB-injected watchpoints in front */
613 if (flags & BP_GDB) {
614 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
615 } else {
616 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
619 tlb_flush_page(cpu, addr);
621 if (watchpoint)
622 *watchpoint = wp;
623 return 0;
626 /* Remove a specific watchpoint. */
627 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
628 int flags)
630 CPUWatchpoint *wp;
632 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
633 if (addr == wp->vaddr && len == wp->len
634 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
635 cpu_watchpoint_remove_by_ref(cpu, wp);
636 return 0;
639 return -ENOENT;
642 /* Remove a specific watchpoint by reference. */
643 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
645 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
647 tlb_flush_page(cpu, watchpoint->vaddr);
649 g_free(watchpoint);
652 /* Remove all matching watchpoints. */
653 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
655 CPUWatchpoint *wp, *next;
657 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
658 if (wp->flags & mask) {
659 cpu_watchpoint_remove_by_ref(cpu, wp);
664 /* Return true if this watchpoint address matches the specified
665 * access (ie the address range covered by the watchpoint overlaps
666 * partially or completely with the address range covered by the
667 * access).
669 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
670 vaddr addr,
671 vaddr len)
673 /* We know the lengths are non-zero, but a little caution is
674 * required to avoid errors in the case where the range ends
675 * exactly at the top of the address space and so addr + len
676 * wraps round to zero.
678 vaddr wpend = wp->vaddr + wp->len - 1;
679 vaddr addrend = addr + len - 1;
681 return !(addr > wpend || wp->vaddr > addrend);
684 #endif
686 /* Add a breakpoint. */
687 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
688 CPUBreakpoint **breakpoint)
690 CPUBreakpoint *bp;
692 bp = g_malloc(sizeof(*bp));
694 bp->pc = pc;
695 bp->flags = flags;
697 /* keep all GDB-injected breakpoints in front */
698 if (flags & BP_GDB) {
699 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
700 } else {
701 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
704 breakpoint_invalidate(cpu, pc);
706 if (breakpoint) {
707 *breakpoint = bp;
709 return 0;
712 /* Remove a specific breakpoint. */
713 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
715 CPUBreakpoint *bp;
717 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
718 if (bp->pc == pc && bp->flags == flags) {
719 cpu_breakpoint_remove_by_ref(cpu, bp);
720 return 0;
723 return -ENOENT;
726 /* Remove a specific breakpoint by reference. */
727 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
729 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
731 breakpoint_invalidate(cpu, breakpoint->pc);
733 g_free(breakpoint);
736 /* Remove all matching breakpoints. */
737 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
739 CPUBreakpoint *bp, *next;
741 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
742 if (bp->flags & mask) {
743 cpu_breakpoint_remove_by_ref(cpu, bp);
748 /* enable or disable single step mode. EXCP_DEBUG is returned by the
749 CPU loop after each instruction */
750 void cpu_single_step(CPUState *cpu, int enabled)
752 if (cpu->singlestep_enabled != enabled) {
753 cpu->singlestep_enabled = enabled;
754 if (kvm_enabled()) {
755 kvm_update_guest_debug(cpu, 0);
756 } else {
757 /* must flush all the translated code to avoid inconsistencies */
758 /* XXX: only flush what is necessary */
759 CPUArchState *env = cpu->env_ptr;
760 tb_flush(env);
765 void cpu_abort(CPUState *cpu, const char *fmt, ...)
767 va_list ap;
768 va_list ap2;
770 va_start(ap, fmt);
771 va_copy(ap2, ap);
772 fprintf(stderr, "qemu: fatal: ");
773 vfprintf(stderr, fmt, ap);
774 fprintf(stderr, "\n");
775 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
776 if (qemu_log_enabled()) {
777 qemu_log("qemu: fatal: ");
778 qemu_log_vprintf(fmt, ap2);
779 qemu_log("\n");
780 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
781 qemu_log_flush();
782 qemu_log_close();
784 va_end(ap2);
785 va_end(ap);
786 #if defined(CONFIG_USER_ONLY)
788 struct sigaction act;
789 sigfillset(&act.sa_mask);
790 act.sa_handler = SIG_DFL;
791 sigaction(SIGABRT, &act, NULL);
793 #endif
794 abort();
797 #if !defined(CONFIG_USER_ONLY)
798 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
800 RAMBlock *block;
802 /* The list is protected by the iothread lock here. */
803 block = ram_list.mru_block;
804 if (block && addr - block->offset < block->max_length) {
805 goto found;
807 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
808 if (addr - block->offset < block->max_length) {
809 goto found;
813 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
814 abort();
816 found:
817 ram_list.mru_block = block;
818 return block;
821 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
823 ram_addr_t start1;
824 RAMBlock *block;
825 ram_addr_t end;
827 end = TARGET_PAGE_ALIGN(start + length);
828 start &= TARGET_PAGE_MASK;
830 block = qemu_get_ram_block(start);
831 assert(block == qemu_get_ram_block(end - 1));
832 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
833 cpu_tlb_reset_dirty_all(start1, length);
836 /* Note: start and end must be within the same ram block. */
837 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
838 unsigned client)
840 if (length == 0)
841 return;
842 cpu_physical_memory_clear_dirty_range_type(start, length, client);
844 if (tcg_enabled()) {
845 tlb_reset_dirty_range_all(start, length);
849 static void cpu_physical_memory_set_dirty_tracking(bool enable)
851 in_migration = enable;
854 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
855 MemoryRegionSection *section,
856 target_ulong vaddr,
857 hwaddr paddr, hwaddr xlat,
858 int prot,
859 target_ulong *address)
861 hwaddr iotlb;
862 CPUWatchpoint *wp;
864 if (memory_region_is_ram(section->mr)) {
865 /* Normal RAM. */
866 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
867 + xlat;
868 if (!section->readonly) {
869 iotlb |= PHYS_SECTION_NOTDIRTY;
870 } else {
871 iotlb |= PHYS_SECTION_ROM;
873 } else {
874 iotlb = section - section->address_space->dispatch->map.sections;
875 iotlb += xlat;
878 /* Make accesses to pages with watchpoints go via the
879 watchpoint trap routines. */
880 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
881 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
882 /* Avoid trapping reads of pages with a write breakpoint. */
883 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
884 iotlb = PHYS_SECTION_WATCH + paddr;
885 *address |= TLB_MMIO;
886 break;
891 return iotlb;
893 #endif /* defined(CONFIG_USER_ONLY) */
895 #if !defined(CONFIG_USER_ONLY)
897 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
898 uint16_t section);
899 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
901 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
902 qemu_anon_ram_alloc;
905 * Set a custom physical guest memory alloator.
906 * Accelerators with unusual needs may need this. Hopefully, we can
907 * get rid of it eventually.
909 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
911 phys_mem_alloc = alloc;
914 static uint16_t phys_section_add(PhysPageMap *map,
915 MemoryRegionSection *section)
917 /* The physical section number is ORed with a page-aligned
918 * pointer to produce the iotlb entries. Thus it should
919 * never overflow into the page-aligned value.
921 assert(map->sections_nb < TARGET_PAGE_SIZE);
923 if (map->sections_nb == map->sections_nb_alloc) {
924 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
925 map->sections = g_renew(MemoryRegionSection, map->sections,
926 map->sections_nb_alloc);
928 map->sections[map->sections_nb] = *section;
929 memory_region_ref(section->mr);
930 return map->sections_nb++;
933 static void phys_section_destroy(MemoryRegion *mr)
935 memory_region_unref(mr);
937 if (mr->subpage) {
938 subpage_t *subpage = container_of(mr, subpage_t, iomem);
939 object_unref(OBJECT(&subpage->iomem));
940 g_free(subpage);
944 static void phys_sections_free(PhysPageMap *map)
946 while (map->sections_nb > 0) {
947 MemoryRegionSection *section = &map->sections[--map->sections_nb];
948 phys_section_destroy(section->mr);
950 g_free(map->sections);
951 g_free(map->nodes);
954 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
956 subpage_t *subpage;
957 hwaddr base = section->offset_within_address_space
958 & TARGET_PAGE_MASK;
959 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
960 d->map.nodes, d->map.sections);
961 MemoryRegionSection subsection = {
962 .offset_within_address_space = base,
963 .size = int128_make64(TARGET_PAGE_SIZE),
965 hwaddr start, end;
967 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
969 if (!(existing->mr->subpage)) {
970 subpage = subpage_init(d->as, base);
971 subsection.address_space = d->as;
972 subsection.mr = &subpage->iomem;
973 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
974 phys_section_add(&d->map, &subsection));
975 } else {
976 subpage = container_of(existing->mr, subpage_t, iomem);
978 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
979 end = start + int128_get64(section->size) - 1;
980 subpage_register(subpage, start, end,
981 phys_section_add(&d->map, section));
985 static void register_multipage(AddressSpaceDispatch *d,
986 MemoryRegionSection *section)
988 hwaddr start_addr = section->offset_within_address_space;
989 uint16_t section_index = phys_section_add(&d->map, section);
990 uint64_t num_pages = int128_get64(int128_rshift(section->size,
991 TARGET_PAGE_BITS));
993 assert(num_pages);
994 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
997 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
999 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1000 AddressSpaceDispatch *d = as->next_dispatch;
1001 MemoryRegionSection now = *section, remain = *section;
1002 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1004 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1005 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1006 - now.offset_within_address_space;
1008 now.size = int128_min(int128_make64(left), now.size);
1009 register_subpage(d, &now);
1010 } else {
1011 now.size = int128_zero();
1013 while (int128_ne(remain.size, now.size)) {
1014 remain.size = int128_sub(remain.size, now.size);
1015 remain.offset_within_address_space += int128_get64(now.size);
1016 remain.offset_within_region += int128_get64(now.size);
1017 now = remain;
1018 if (int128_lt(remain.size, page_size)) {
1019 register_subpage(d, &now);
1020 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1021 now.size = page_size;
1022 register_subpage(d, &now);
1023 } else {
1024 now.size = int128_and(now.size, int128_neg(page_size));
1025 register_multipage(d, &now);
1030 void qemu_flush_coalesced_mmio_buffer(void)
1032 if (kvm_enabled())
1033 kvm_flush_coalesced_mmio_buffer();
1036 void qemu_mutex_lock_ramlist(void)
1038 qemu_mutex_lock(&ram_list.mutex);
1041 void qemu_mutex_unlock_ramlist(void)
1043 qemu_mutex_unlock(&ram_list.mutex);
1046 #ifdef __linux__
1048 #include <sys/vfs.h>
1050 #define HUGETLBFS_MAGIC 0x958458f6
1052 static long gethugepagesize(const char *path, Error **errp)
1054 struct statfs fs;
1055 int ret;
1057 do {
1058 ret = statfs(path, &fs);
1059 } while (ret != 0 && errno == EINTR);
1061 if (ret != 0) {
1062 error_setg_errno(errp, errno, "failed to get page size of file %s",
1063 path);
1064 return 0;
1067 if (fs.f_type != HUGETLBFS_MAGIC)
1068 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1070 return fs.f_bsize;
1073 static void *file_ram_alloc(RAMBlock *block,
1074 ram_addr_t memory,
1075 const char *path,
1076 Error **errp)
1078 char *filename;
1079 char *sanitized_name;
1080 char *c;
1081 void *area = NULL;
1082 int fd;
1083 uint64_t hpagesize;
1084 Error *local_err = NULL;
1086 hpagesize = gethugepagesize(path, &local_err);
1087 if (local_err) {
1088 error_propagate(errp, local_err);
1089 goto error;
1091 block->mr->align = hpagesize;
1093 if (memory < hpagesize) {
1094 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1095 "or larger than huge page size 0x%" PRIx64,
1096 memory, hpagesize);
1097 goto error;
1100 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1101 error_setg(errp,
1102 "host lacks kvm mmu notifiers, -mem-path unsupported");
1103 goto error;
1106 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1107 sanitized_name = g_strdup(memory_region_name(block->mr));
1108 for (c = sanitized_name; *c != '\0'; c++) {
1109 if (*c == '/')
1110 *c = '_';
1113 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1114 sanitized_name);
1115 g_free(sanitized_name);
1117 fd = mkstemp(filename);
1118 if (fd < 0) {
1119 error_setg_errno(errp, errno,
1120 "unable to create backing store for hugepages");
1121 g_free(filename);
1122 goto error;
1124 unlink(filename);
1125 g_free(filename);
1127 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1130 * ftruncate is not supported by hugetlbfs in older
1131 * hosts, so don't bother bailing out on errors.
1132 * If anything goes wrong with it under other filesystems,
1133 * mmap will fail.
1135 if (ftruncate(fd, memory)) {
1136 perror("ftruncate");
1139 area = mmap(0, memory, PROT_READ | PROT_WRITE,
1140 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
1141 fd, 0);
1142 if (area == MAP_FAILED) {
1143 error_setg_errno(errp, errno,
1144 "unable to map backing store for hugepages");
1145 close(fd);
1146 goto error;
1149 if (mem_prealloc) {
1150 os_mem_prealloc(fd, area, memory);
1153 block->fd = fd;
1154 return area;
1156 error:
1157 if (mem_prealloc) {
1158 error_report("%s\n", error_get_pretty(*errp));
1159 exit(1);
1161 return NULL;
1163 #endif
1165 static ram_addr_t find_ram_offset(ram_addr_t size)
1167 RAMBlock *block, *next_block;
1168 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1170 assert(size != 0); /* it would hand out same offset multiple times */
1172 if (QTAILQ_EMPTY(&ram_list.blocks))
1173 return 0;
1175 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1176 ram_addr_t end, next = RAM_ADDR_MAX;
1178 end = block->offset + block->max_length;
1180 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1181 if (next_block->offset >= end) {
1182 next = MIN(next, next_block->offset);
1185 if (next - end >= size && next - end < mingap) {
1186 offset = end;
1187 mingap = next - end;
1191 if (offset == RAM_ADDR_MAX) {
1192 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1193 (uint64_t)size);
1194 abort();
1197 return offset;
1200 ram_addr_t last_ram_offset(void)
1202 RAMBlock *block;
1203 ram_addr_t last = 0;
1205 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1206 last = MAX(last, block->offset + block->max_length);
1208 return last;
1211 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1213 int ret;
1215 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1216 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1217 "dump-guest-core", true)) {
1218 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1219 if (ret) {
1220 perror("qemu_madvise");
1221 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1222 "but dump_guest_core=off specified\n");
1227 static RAMBlock *find_ram_block(ram_addr_t addr)
1229 RAMBlock *block;
1231 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1232 if (block->offset == addr) {
1233 return block;
1237 return NULL;
1240 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1242 RAMBlock *new_block = find_ram_block(addr);
1243 RAMBlock *block;
1245 assert(new_block);
1246 assert(!new_block->idstr[0]);
1248 if (dev) {
1249 char *id = qdev_get_dev_path(dev);
1250 if (id) {
1251 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1252 g_free(id);
1255 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1257 /* This assumes the iothread lock is taken here too. */
1258 qemu_mutex_lock_ramlist();
1259 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1260 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1261 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1262 new_block->idstr);
1263 abort();
1266 qemu_mutex_unlock_ramlist();
1269 void qemu_ram_unset_idstr(ram_addr_t addr)
1271 RAMBlock *block = find_ram_block(addr);
1273 if (block) {
1274 memset(block->idstr, 0, sizeof(block->idstr));
1278 static int memory_try_enable_merging(void *addr, size_t len)
1280 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1281 /* disabled by the user */
1282 return 0;
1285 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1288 /* Only legal before guest might have detected the memory size: e.g. on
1289 * incoming migration, or right after reset.
1291 * As memory core doesn't know how is memory accessed, it is up to
1292 * resize callback to update device state and/or add assertions to detect
1293 * misuse, if necessary.
1295 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1297 RAMBlock *block = find_ram_block(base);
1299 assert(block);
1301 if (block->used_length == newsize) {
1302 return 0;
1305 if (!(block->flags & RAM_RESIZEABLE)) {
1306 error_setg_errno(errp, EINVAL,
1307 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1308 " in != 0x" RAM_ADDR_FMT, block->idstr,
1309 newsize, block->used_length);
1310 return -EINVAL;
1313 if (block->max_length < newsize) {
1314 error_setg_errno(errp, EINVAL,
1315 "Length too large: %s: 0x" RAM_ADDR_FMT
1316 " > 0x" RAM_ADDR_FMT, block->idstr,
1317 newsize, block->max_length);
1318 return -EINVAL;
1321 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1322 block->used_length = newsize;
1323 cpu_physical_memory_set_dirty_range(block->offset, block->used_length);
1324 memory_region_set_size(block->mr, newsize);
1325 if (block->resized) {
1326 block->resized(block->idstr, newsize, block->host);
1328 return 0;
1331 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1333 RAMBlock *block;
1334 ram_addr_t old_ram_size, new_ram_size;
1336 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1338 /* This assumes the iothread lock is taken here too. */
1339 qemu_mutex_lock_ramlist();
1340 new_block->offset = find_ram_offset(new_block->max_length);
1342 if (!new_block->host) {
1343 if (xen_enabled()) {
1344 xen_ram_alloc(new_block->offset, new_block->max_length,
1345 new_block->mr);
1346 } else {
1347 new_block->host = phys_mem_alloc(new_block->max_length,
1348 &new_block->mr->align);
1349 if (!new_block->host) {
1350 error_setg_errno(errp, errno,
1351 "cannot set up guest memory '%s'",
1352 memory_region_name(new_block->mr));
1353 qemu_mutex_unlock_ramlist();
1354 return -1;
1356 memory_try_enable_merging(new_block->host, new_block->max_length);
1360 /* Keep the list sorted from biggest to smallest block. */
1361 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1362 if (block->max_length < new_block->max_length) {
1363 break;
1366 if (block) {
1367 QTAILQ_INSERT_BEFORE(block, new_block, next);
1368 } else {
1369 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1371 ram_list.mru_block = NULL;
1373 ram_list.version++;
1374 qemu_mutex_unlock_ramlist();
1376 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1378 if (new_ram_size > old_ram_size) {
1379 int i;
1380 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1381 ram_list.dirty_memory[i] =
1382 bitmap_zero_extend(ram_list.dirty_memory[i],
1383 old_ram_size, new_ram_size);
1386 cpu_physical_memory_set_dirty_range(new_block->offset,
1387 new_block->used_length);
1389 if (new_block->host) {
1390 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1391 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1392 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1393 if (kvm_enabled()) {
1394 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1398 return new_block->offset;
1401 #ifdef __linux__
1402 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1403 bool share, const char *mem_path,
1404 Error **errp)
1406 RAMBlock *new_block;
1407 ram_addr_t addr;
1408 Error *local_err = NULL;
1410 if (xen_enabled()) {
1411 error_setg(errp, "-mem-path not supported with Xen");
1412 return -1;
1415 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1417 * file_ram_alloc() needs to allocate just like
1418 * phys_mem_alloc, but we haven't bothered to provide
1419 * a hook there.
1421 error_setg(errp,
1422 "-mem-path not supported with this accelerator");
1423 return -1;
1426 size = TARGET_PAGE_ALIGN(size);
1427 new_block = g_malloc0(sizeof(*new_block));
1428 new_block->mr = mr;
1429 new_block->used_length = size;
1430 new_block->max_length = size;
1431 new_block->flags = share ? RAM_SHARED : 0;
1432 new_block->host = file_ram_alloc(new_block, size,
1433 mem_path, errp);
1434 if (!new_block->host) {
1435 g_free(new_block);
1436 return -1;
1439 addr = ram_block_add(new_block, &local_err);
1440 if (local_err) {
1441 g_free(new_block);
1442 error_propagate(errp, local_err);
1443 return -1;
1445 return addr;
1447 #endif
1449 static
1450 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1451 void (*resized)(const char*,
1452 uint64_t length,
1453 void *host),
1454 void *host, bool resizeable,
1455 MemoryRegion *mr, Error **errp)
1457 RAMBlock *new_block;
1458 ram_addr_t addr;
1459 Error *local_err = NULL;
1461 size = TARGET_PAGE_ALIGN(size);
1462 max_size = TARGET_PAGE_ALIGN(max_size);
1463 new_block = g_malloc0(sizeof(*new_block));
1464 new_block->mr = mr;
1465 new_block->resized = resized;
1466 new_block->used_length = size;
1467 new_block->max_length = max_size;
1468 assert(max_size >= size);
1469 new_block->fd = -1;
1470 new_block->host = host;
1471 if (host) {
1472 new_block->flags |= RAM_PREALLOC;
1474 if (resizeable) {
1475 new_block->flags |= RAM_RESIZEABLE;
1477 addr = ram_block_add(new_block, &local_err);
1478 if (local_err) {
1479 g_free(new_block);
1480 error_propagate(errp, local_err);
1481 return -1;
1483 return addr;
1486 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1487 MemoryRegion *mr, Error **errp)
1489 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1492 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1494 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1497 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1498 void (*resized)(const char*,
1499 uint64_t length,
1500 void *host),
1501 MemoryRegion *mr, Error **errp)
1503 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1506 void qemu_ram_free_from_ptr(ram_addr_t addr)
1508 RAMBlock *block;
1510 /* This assumes the iothread lock is taken here too. */
1511 qemu_mutex_lock_ramlist();
1512 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1513 if (addr == block->offset) {
1514 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1515 ram_list.mru_block = NULL;
1516 ram_list.version++;
1517 g_free(block);
1518 break;
1521 qemu_mutex_unlock_ramlist();
1524 void qemu_ram_free(ram_addr_t addr)
1526 RAMBlock *block;
1528 /* This assumes the iothread lock is taken here too. */
1529 qemu_mutex_lock_ramlist();
1530 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1531 if (addr == block->offset) {
1532 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1533 ram_list.mru_block = NULL;
1534 ram_list.version++;
1535 if (block->flags & RAM_PREALLOC) {
1537 } else if (xen_enabled()) {
1538 xen_invalidate_map_cache_entry(block->host);
1539 #ifndef _WIN32
1540 } else if (block->fd >= 0) {
1541 munmap(block->host, block->max_length);
1542 close(block->fd);
1543 #endif
1544 } else {
1545 qemu_anon_ram_free(block->host, block->max_length);
1547 g_free(block);
1548 break;
1551 qemu_mutex_unlock_ramlist();
1555 #ifndef _WIN32
1556 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1558 RAMBlock *block;
1559 ram_addr_t offset;
1560 int flags;
1561 void *area, *vaddr;
1563 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1564 offset = addr - block->offset;
1565 if (offset < block->max_length) {
1566 vaddr = ramblock_ptr(block, offset);
1567 if (block->flags & RAM_PREALLOC) {
1569 } else if (xen_enabled()) {
1570 abort();
1571 } else {
1572 flags = MAP_FIXED;
1573 munmap(vaddr, length);
1574 if (block->fd >= 0) {
1575 flags |= (block->flags & RAM_SHARED ?
1576 MAP_SHARED : MAP_PRIVATE);
1577 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1578 flags, block->fd, offset);
1579 } else {
1581 * Remap needs to match alloc. Accelerators that
1582 * set phys_mem_alloc never remap. If they did,
1583 * we'd need a remap hook here.
1585 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1587 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1588 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1589 flags, -1, 0);
1591 if (area != vaddr) {
1592 fprintf(stderr, "Could not remap addr: "
1593 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1594 length, addr);
1595 exit(1);
1597 memory_try_enable_merging(vaddr, length);
1598 qemu_ram_setup_dump(vaddr, length);
1600 return;
1604 #endif /* !_WIN32 */
1606 int qemu_get_ram_fd(ram_addr_t addr)
1608 RAMBlock *block = qemu_get_ram_block(addr);
1610 return block->fd;
1613 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1615 RAMBlock *block = qemu_get_ram_block(addr);
1617 return ramblock_ptr(block, 0);
1620 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1621 With the exception of the softmmu code in this file, this should
1622 only be used for local memory (e.g. video ram) that the device owns,
1623 and knows it isn't going to access beyond the end of the block.
1625 It should not be used for general purpose DMA.
1626 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1628 void *qemu_get_ram_ptr(ram_addr_t addr)
1630 RAMBlock *block = qemu_get_ram_block(addr);
1632 if (xen_enabled()) {
1633 /* We need to check if the requested address is in the RAM
1634 * because we don't want to map the entire memory in QEMU.
1635 * In that case just map until the end of the page.
1637 if (block->offset == 0) {
1638 return xen_map_cache(addr, 0, 0);
1639 } else if (block->host == NULL) {
1640 block->host =
1641 xen_map_cache(block->offset, block->max_length, 1);
1644 return ramblock_ptr(block, addr - block->offset);
1647 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1648 * but takes a size argument */
1649 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1651 if (*size == 0) {
1652 return NULL;
1654 if (xen_enabled()) {
1655 return xen_map_cache(addr, *size, 1);
1656 } else {
1657 RAMBlock *block;
1659 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1660 if (addr - block->offset < block->max_length) {
1661 if (addr - block->offset + *size > block->max_length)
1662 *size = block->max_length - addr + block->offset;
1663 return ramblock_ptr(block, addr - block->offset);
1667 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1668 abort();
1672 /* Some of the softmmu routines need to translate from a host pointer
1673 (typically a TLB entry) back to a ram offset. */
1674 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1676 RAMBlock *block;
1677 uint8_t *host = ptr;
1679 if (xen_enabled()) {
1680 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1681 return qemu_get_ram_block(*ram_addr)->mr;
1684 block = ram_list.mru_block;
1685 if (block && block->host && host - block->host < block->max_length) {
1686 goto found;
1689 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1690 /* This case append when the block is not mapped. */
1691 if (block->host == NULL) {
1692 continue;
1694 if (host - block->host < block->max_length) {
1695 goto found;
1699 return NULL;
1701 found:
1702 *ram_addr = block->offset + (host - block->host);
1703 return block->mr;
1706 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1707 uint64_t val, unsigned size)
1709 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1710 tb_invalidate_phys_page_fast(ram_addr, size);
1712 switch (size) {
1713 case 1:
1714 stb_p(qemu_get_ram_ptr(ram_addr), val);
1715 break;
1716 case 2:
1717 stw_p(qemu_get_ram_ptr(ram_addr), val);
1718 break;
1719 case 4:
1720 stl_p(qemu_get_ram_ptr(ram_addr), val);
1721 break;
1722 default:
1723 abort();
1725 cpu_physical_memory_set_dirty_range_nocode(ram_addr, size);
1726 /* we remove the notdirty callback only if the code has been
1727 flushed */
1728 if (!cpu_physical_memory_is_clean(ram_addr)) {
1729 CPUArchState *env = current_cpu->env_ptr;
1730 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1734 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1735 unsigned size, bool is_write)
1737 return is_write;
1740 static const MemoryRegionOps notdirty_mem_ops = {
1741 .write = notdirty_mem_write,
1742 .valid.accepts = notdirty_mem_accepts,
1743 .endianness = DEVICE_NATIVE_ENDIAN,
1746 /* Generate a debug exception if a watchpoint has been hit. */
1747 static void check_watchpoint(int offset, int len, int flags)
1749 CPUState *cpu = current_cpu;
1750 CPUArchState *env = cpu->env_ptr;
1751 target_ulong pc, cs_base;
1752 target_ulong vaddr;
1753 CPUWatchpoint *wp;
1754 int cpu_flags;
1756 if (cpu->watchpoint_hit) {
1757 /* We re-entered the check after replacing the TB. Now raise
1758 * the debug interrupt so that is will trigger after the
1759 * current instruction. */
1760 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1761 return;
1763 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1764 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1765 if (cpu_watchpoint_address_matches(wp, vaddr, len)
1766 && (wp->flags & flags)) {
1767 if (flags == BP_MEM_READ) {
1768 wp->flags |= BP_WATCHPOINT_HIT_READ;
1769 } else {
1770 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
1772 wp->hitaddr = vaddr;
1773 if (!cpu->watchpoint_hit) {
1774 cpu->watchpoint_hit = wp;
1775 tb_check_watchpoint(cpu);
1776 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1777 cpu->exception_index = EXCP_DEBUG;
1778 cpu_loop_exit(cpu);
1779 } else {
1780 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1781 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1782 cpu_resume_from_signal(cpu, NULL);
1785 } else {
1786 wp->flags &= ~BP_WATCHPOINT_HIT;
1791 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1792 so these check for a hit then pass through to the normal out-of-line
1793 phys routines. */
1794 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1795 unsigned size)
1797 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_READ);
1798 switch (size) {
1799 case 1: return ldub_phys(&address_space_memory, addr);
1800 case 2: return lduw_phys(&address_space_memory, addr);
1801 case 4: return ldl_phys(&address_space_memory, addr);
1802 default: abort();
1806 static void watch_mem_write(void *opaque, hwaddr addr,
1807 uint64_t val, unsigned size)
1809 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_WRITE);
1810 switch (size) {
1811 case 1:
1812 stb_phys(&address_space_memory, addr, val);
1813 break;
1814 case 2:
1815 stw_phys(&address_space_memory, addr, val);
1816 break;
1817 case 4:
1818 stl_phys(&address_space_memory, addr, val);
1819 break;
1820 default: abort();
1824 static const MemoryRegionOps watch_mem_ops = {
1825 .read = watch_mem_read,
1826 .write = watch_mem_write,
1827 .endianness = DEVICE_NATIVE_ENDIAN,
1830 static uint64_t subpage_read(void *opaque, hwaddr addr,
1831 unsigned len)
1833 subpage_t *subpage = opaque;
1834 uint8_t buf[8];
1836 #if defined(DEBUG_SUBPAGE)
1837 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1838 subpage, len, addr);
1839 #endif
1840 address_space_read(subpage->as, addr + subpage->base, buf, len);
1841 switch (len) {
1842 case 1:
1843 return ldub_p(buf);
1844 case 2:
1845 return lduw_p(buf);
1846 case 4:
1847 return ldl_p(buf);
1848 case 8:
1849 return ldq_p(buf);
1850 default:
1851 abort();
1855 static void subpage_write(void *opaque, hwaddr addr,
1856 uint64_t value, unsigned len)
1858 subpage_t *subpage = opaque;
1859 uint8_t buf[8];
1861 #if defined(DEBUG_SUBPAGE)
1862 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1863 " value %"PRIx64"\n",
1864 __func__, subpage, len, addr, value);
1865 #endif
1866 switch (len) {
1867 case 1:
1868 stb_p(buf, value);
1869 break;
1870 case 2:
1871 stw_p(buf, value);
1872 break;
1873 case 4:
1874 stl_p(buf, value);
1875 break;
1876 case 8:
1877 stq_p(buf, value);
1878 break;
1879 default:
1880 abort();
1882 address_space_write(subpage->as, addr + subpage->base, buf, len);
1885 static bool subpage_accepts(void *opaque, hwaddr addr,
1886 unsigned len, bool is_write)
1888 subpage_t *subpage = opaque;
1889 #if defined(DEBUG_SUBPAGE)
1890 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1891 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1892 #endif
1894 return address_space_access_valid(subpage->as, addr + subpage->base,
1895 len, is_write);
1898 static const MemoryRegionOps subpage_ops = {
1899 .read = subpage_read,
1900 .write = subpage_write,
1901 .impl.min_access_size = 1,
1902 .impl.max_access_size = 8,
1903 .valid.min_access_size = 1,
1904 .valid.max_access_size = 8,
1905 .valid.accepts = subpage_accepts,
1906 .endianness = DEVICE_NATIVE_ENDIAN,
1909 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1910 uint16_t section)
1912 int idx, eidx;
1914 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1915 return -1;
1916 idx = SUBPAGE_IDX(start);
1917 eidx = SUBPAGE_IDX(end);
1918 #if defined(DEBUG_SUBPAGE)
1919 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1920 __func__, mmio, start, end, idx, eidx, section);
1921 #endif
1922 for (; idx <= eidx; idx++) {
1923 mmio->sub_section[idx] = section;
1926 return 0;
1929 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1931 subpage_t *mmio;
1933 mmio = g_malloc0(sizeof(subpage_t));
1935 mmio->as = as;
1936 mmio->base = base;
1937 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1938 NULL, TARGET_PAGE_SIZE);
1939 mmio->iomem.subpage = true;
1940 #if defined(DEBUG_SUBPAGE)
1941 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1942 mmio, base, TARGET_PAGE_SIZE);
1943 #endif
1944 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1946 return mmio;
1949 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
1950 MemoryRegion *mr)
1952 assert(as);
1953 MemoryRegionSection section = {
1954 .address_space = as,
1955 .mr = mr,
1956 .offset_within_address_space = 0,
1957 .offset_within_region = 0,
1958 .size = int128_2_64(),
1961 return phys_section_add(map, &section);
1964 MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1966 return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1969 static void io_mem_init(void)
1971 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
1972 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1973 NULL, UINT64_MAX);
1974 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1975 NULL, UINT64_MAX);
1976 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1977 NULL, UINT64_MAX);
1980 static void mem_begin(MemoryListener *listener)
1982 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1983 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1984 uint16_t n;
1986 n = dummy_section(&d->map, as, &io_mem_unassigned);
1987 assert(n == PHYS_SECTION_UNASSIGNED);
1988 n = dummy_section(&d->map, as, &io_mem_notdirty);
1989 assert(n == PHYS_SECTION_NOTDIRTY);
1990 n = dummy_section(&d->map, as, &io_mem_rom);
1991 assert(n == PHYS_SECTION_ROM);
1992 n = dummy_section(&d->map, as, &io_mem_watch);
1993 assert(n == PHYS_SECTION_WATCH);
1995 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1996 d->as = as;
1997 as->next_dispatch = d;
2000 static void mem_commit(MemoryListener *listener)
2002 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2003 AddressSpaceDispatch *cur = as->dispatch;
2004 AddressSpaceDispatch *next = as->next_dispatch;
2006 phys_page_compact_all(next, next->map.nodes_nb);
2008 as->dispatch = next;
2010 if (cur) {
2011 phys_sections_free(&cur->map);
2012 g_free(cur);
2016 static void tcg_commit(MemoryListener *listener)
2018 CPUState *cpu;
2020 /* since each CPU stores ram addresses in its TLB cache, we must
2021 reset the modified entries */
2022 /* XXX: slow ! */
2023 CPU_FOREACH(cpu) {
2024 /* FIXME: Disentangle the cpu.h circular files deps so we can
2025 directly get the right CPU from listener. */
2026 if (cpu->tcg_as_listener != listener) {
2027 continue;
2029 tlb_flush(cpu, 1);
2033 static void core_log_global_start(MemoryListener *listener)
2035 cpu_physical_memory_set_dirty_tracking(true);
2038 static void core_log_global_stop(MemoryListener *listener)
2040 cpu_physical_memory_set_dirty_tracking(false);
2043 static MemoryListener core_memory_listener = {
2044 .log_global_start = core_log_global_start,
2045 .log_global_stop = core_log_global_stop,
2046 .priority = 1,
2049 void address_space_init_dispatch(AddressSpace *as)
2051 as->dispatch = NULL;
2052 as->dispatch_listener = (MemoryListener) {
2053 .begin = mem_begin,
2054 .commit = mem_commit,
2055 .region_add = mem_add,
2056 .region_nop = mem_add,
2057 .priority = 0,
2059 memory_listener_register(&as->dispatch_listener, as);
2062 void address_space_destroy_dispatch(AddressSpace *as)
2064 AddressSpaceDispatch *d = as->dispatch;
2066 memory_listener_unregister(&as->dispatch_listener);
2067 g_free(d);
2068 as->dispatch = NULL;
2071 static void memory_map_init(void)
2073 system_memory = g_malloc(sizeof(*system_memory));
2075 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2076 address_space_init(&address_space_memory, system_memory, "memory");
2078 system_io = g_malloc(sizeof(*system_io));
2079 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2080 65536);
2081 address_space_init(&address_space_io, system_io, "I/O");
2083 memory_listener_register(&core_memory_listener, &address_space_memory);
2086 MemoryRegion *get_system_memory(void)
2088 return system_memory;
2091 MemoryRegion *get_system_io(void)
2093 return system_io;
2096 #endif /* !defined(CONFIG_USER_ONLY) */
2098 /* physical memory access (slow version, mainly for debug) */
2099 #if defined(CONFIG_USER_ONLY)
2100 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2101 uint8_t *buf, int len, int is_write)
2103 int l, flags;
2104 target_ulong page;
2105 void * p;
2107 while (len > 0) {
2108 page = addr & TARGET_PAGE_MASK;
2109 l = (page + TARGET_PAGE_SIZE) - addr;
2110 if (l > len)
2111 l = len;
2112 flags = page_get_flags(page);
2113 if (!(flags & PAGE_VALID))
2114 return -1;
2115 if (is_write) {
2116 if (!(flags & PAGE_WRITE))
2117 return -1;
2118 /* XXX: this code should not depend on lock_user */
2119 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2120 return -1;
2121 memcpy(p, buf, l);
2122 unlock_user(p, addr, l);
2123 } else {
2124 if (!(flags & PAGE_READ))
2125 return -1;
2126 /* XXX: this code should not depend on lock_user */
2127 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2128 return -1;
2129 memcpy(buf, p, l);
2130 unlock_user(p, addr, 0);
2132 len -= l;
2133 buf += l;
2134 addr += l;
2136 return 0;
2139 #else
2141 static void invalidate_and_set_dirty(hwaddr addr,
2142 hwaddr length)
2144 if (cpu_physical_memory_range_includes_clean(addr, length)) {
2145 tb_invalidate_phys_range(addr, addr + length, 0);
2146 cpu_physical_memory_set_dirty_range_nocode(addr, length);
2148 xen_modified_memory(addr, length);
2151 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2153 unsigned access_size_max = mr->ops->valid.max_access_size;
2155 /* Regions are assumed to support 1-4 byte accesses unless
2156 otherwise specified. */
2157 if (access_size_max == 0) {
2158 access_size_max = 4;
2161 /* Bound the maximum access by the alignment of the address. */
2162 if (!mr->ops->impl.unaligned) {
2163 unsigned align_size_max = addr & -addr;
2164 if (align_size_max != 0 && align_size_max < access_size_max) {
2165 access_size_max = align_size_max;
2169 /* Don't attempt accesses larger than the maximum. */
2170 if (l > access_size_max) {
2171 l = access_size_max;
2173 if (l & (l - 1)) {
2174 l = 1 << (qemu_fls(l) - 1);
2177 return l;
2180 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
2181 int len, bool is_write)
2183 hwaddr l;
2184 uint8_t *ptr;
2185 uint64_t val;
2186 hwaddr addr1;
2187 MemoryRegion *mr;
2188 bool error = false;
2190 while (len > 0) {
2191 l = len;
2192 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2194 if (is_write) {
2195 if (!memory_access_is_direct(mr, is_write)) {
2196 l = memory_access_size(mr, l, addr1);
2197 /* XXX: could force current_cpu to NULL to avoid
2198 potential bugs */
2199 switch (l) {
2200 case 8:
2201 /* 64 bit write access */
2202 val = ldq_p(buf);
2203 error |= io_mem_write(mr, addr1, val, 8);
2204 break;
2205 case 4:
2206 /* 32 bit write access */
2207 val = ldl_p(buf);
2208 error |= io_mem_write(mr, addr1, val, 4);
2209 break;
2210 case 2:
2211 /* 16 bit write access */
2212 val = lduw_p(buf);
2213 error |= io_mem_write(mr, addr1, val, 2);
2214 break;
2215 case 1:
2216 /* 8 bit write access */
2217 val = ldub_p(buf);
2218 error |= io_mem_write(mr, addr1, val, 1);
2219 break;
2220 default:
2221 abort();
2223 } else {
2224 addr1 += memory_region_get_ram_addr(mr);
2225 /* RAM case */
2226 ptr = qemu_get_ram_ptr(addr1);
2227 memcpy(ptr, buf, l);
2228 invalidate_and_set_dirty(addr1, l);
2230 } else {
2231 if (!memory_access_is_direct(mr, is_write)) {
2232 /* I/O case */
2233 l = memory_access_size(mr, l, addr1);
2234 switch (l) {
2235 case 8:
2236 /* 64 bit read access */
2237 error |= io_mem_read(mr, addr1, &val, 8);
2238 stq_p(buf, val);
2239 break;
2240 case 4:
2241 /* 32 bit read access */
2242 error |= io_mem_read(mr, addr1, &val, 4);
2243 stl_p(buf, val);
2244 break;
2245 case 2:
2246 /* 16 bit read access */
2247 error |= io_mem_read(mr, addr1, &val, 2);
2248 stw_p(buf, val);
2249 break;
2250 case 1:
2251 /* 8 bit read access */
2252 error |= io_mem_read(mr, addr1, &val, 1);
2253 stb_p(buf, val);
2254 break;
2255 default:
2256 abort();
2258 } else {
2259 /* RAM case */
2260 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2261 memcpy(buf, ptr, l);
2264 len -= l;
2265 buf += l;
2266 addr += l;
2269 return error;
2272 bool address_space_write(AddressSpace *as, hwaddr addr,
2273 const uint8_t *buf, int len)
2275 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2278 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2280 return address_space_rw(as, addr, buf, len, false);
2284 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2285 int len, int is_write)
2287 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2290 enum write_rom_type {
2291 WRITE_DATA,
2292 FLUSH_CACHE,
2295 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2296 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2298 hwaddr l;
2299 uint8_t *ptr;
2300 hwaddr addr1;
2301 MemoryRegion *mr;
2303 while (len > 0) {
2304 l = len;
2305 mr = address_space_translate(as, addr, &addr1, &l, true);
2307 if (!(memory_region_is_ram(mr) ||
2308 memory_region_is_romd(mr))) {
2309 /* do nothing */
2310 } else {
2311 addr1 += memory_region_get_ram_addr(mr);
2312 /* ROM/RAM case */
2313 ptr = qemu_get_ram_ptr(addr1);
2314 switch (type) {
2315 case WRITE_DATA:
2316 memcpy(ptr, buf, l);
2317 invalidate_and_set_dirty(addr1, l);
2318 break;
2319 case FLUSH_CACHE:
2320 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2321 break;
2324 len -= l;
2325 buf += l;
2326 addr += l;
2330 /* used for ROM loading : can write in RAM and ROM */
2331 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2332 const uint8_t *buf, int len)
2334 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2337 void cpu_flush_icache_range(hwaddr start, int len)
2340 * This function should do the same thing as an icache flush that was
2341 * triggered from within the guest. For TCG we are always cache coherent,
2342 * so there is no need to flush anything. For KVM / Xen we need to flush
2343 * the host's instruction cache at least.
2345 if (tcg_enabled()) {
2346 return;
2349 cpu_physical_memory_write_rom_internal(&address_space_memory,
2350 start, NULL, len, FLUSH_CACHE);
2353 typedef struct {
2354 MemoryRegion *mr;
2355 void *buffer;
2356 hwaddr addr;
2357 hwaddr len;
2358 } BounceBuffer;
2360 static BounceBuffer bounce;
2362 typedef struct MapClient {
2363 void *opaque;
2364 void (*callback)(void *opaque);
2365 QLIST_ENTRY(MapClient) link;
2366 } MapClient;
2368 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2369 = QLIST_HEAD_INITIALIZER(map_client_list);
2371 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2373 MapClient *client = g_malloc(sizeof(*client));
2375 client->opaque = opaque;
2376 client->callback = callback;
2377 QLIST_INSERT_HEAD(&map_client_list, client, link);
2378 return client;
2381 static void cpu_unregister_map_client(void *_client)
2383 MapClient *client = (MapClient *)_client;
2385 QLIST_REMOVE(client, link);
2386 g_free(client);
2389 static void cpu_notify_map_clients(void)
2391 MapClient *client;
2393 while (!QLIST_EMPTY(&map_client_list)) {
2394 client = QLIST_FIRST(&map_client_list);
2395 client->callback(client->opaque);
2396 cpu_unregister_map_client(client);
2400 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2402 MemoryRegion *mr;
2403 hwaddr l, xlat;
2405 while (len > 0) {
2406 l = len;
2407 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2408 if (!memory_access_is_direct(mr, is_write)) {
2409 l = memory_access_size(mr, l, addr);
2410 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2411 return false;
2415 len -= l;
2416 addr += l;
2418 return true;
2421 /* Map a physical memory region into a host virtual address.
2422 * May map a subset of the requested range, given by and returned in *plen.
2423 * May return NULL if resources needed to perform the mapping are exhausted.
2424 * Use only for reads OR writes - not for read-modify-write operations.
2425 * Use cpu_register_map_client() to know when retrying the map operation is
2426 * likely to succeed.
2428 void *address_space_map(AddressSpace *as,
2429 hwaddr addr,
2430 hwaddr *plen,
2431 bool is_write)
2433 hwaddr len = *plen;
2434 hwaddr done = 0;
2435 hwaddr l, xlat, base;
2436 MemoryRegion *mr, *this_mr;
2437 ram_addr_t raddr;
2439 if (len == 0) {
2440 return NULL;
2443 l = len;
2444 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2445 if (!memory_access_is_direct(mr, is_write)) {
2446 if (bounce.buffer) {
2447 return NULL;
2449 /* Avoid unbounded allocations */
2450 l = MIN(l, TARGET_PAGE_SIZE);
2451 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2452 bounce.addr = addr;
2453 bounce.len = l;
2455 memory_region_ref(mr);
2456 bounce.mr = mr;
2457 if (!is_write) {
2458 address_space_read(as, addr, bounce.buffer, l);
2461 *plen = l;
2462 return bounce.buffer;
2465 base = xlat;
2466 raddr = memory_region_get_ram_addr(mr);
2468 for (;;) {
2469 len -= l;
2470 addr += l;
2471 done += l;
2472 if (len == 0) {
2473 break;
2476 l = len;
2477 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2478 if (this_mr != mr || xlat != base + done) {
2479 break;
2483 memory_region_ref(mr);
2484 *plen = done;
2485 return qemu_ram_ptr_length(raddr + base, plen);
2488 /* Unmaps a memory region previously mapped by address_space_map().
2489 * Will also mark the memory as dirty if is_write == 1. access_len gives
2490 * the amount of memory that was actually read or written by the caller.
2492 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2493 int is_write, hwaddr access_len)
2495 if (buffer != bounce.buffer) {
2496 MemoryRegion *mr;
2497 ram_addr_t addr1;
2499 mr = qemu_ram_addr_from_host(buffer, &addr1);
2500 assert(mr != NULL);
2501 if (is_write) {
2502 invalidate_and_set_dirty(addr1, access_len);
2504 if (xen_enabled()) {
2505 xen_invalidate_map_cache_entry(buffer);
2507 memory_region_unref(mr);
2508 return;
2510 if (is_write) {
2511 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2513 qemu_vfree(bounce.buffer);
2514 bounce.buffer = NULL;
2515 memory_region_unref(bounce.mr);
2516 cpu_notify_map_clients();
2519 void *cpu_physical_memory_map(hwaddr addr,
2520 hwaddr *plen,
2521 int is_write)
2523 return address_space_map(&address_space_memory, addr, plen, is_write);
2526 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2527 int is_write, hwaddr access_len)
2529 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2532 /* warning: addr must be aligned */
2533 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2534 enum device_endian endian)
2536 uint8_t *ptr;
2537 uint64_t val;
2538 MemoryRegion *mr;
2539 hwaddr l = 4;
2540 hwaddr addr1;
2542 mr = address_space_translate(as, addr, &addr1, &l, false);
2543 if (l < 4 || !memory_access_is_direct(mr, false)) {
2544 /* I/O case */
2545 io_mem_read(mr, addr1, &val, 4);
2546 #if defined(TARGET_WORDS_BIGENDIAN)
2547 if (endian == DEVICE_LITTLE_ENDIAN) {
2548 val = bswap32(val);
2550 #else
2551 if (endian == DEVICE_BIG_ENDIAN) {
2552 val = bswap32(val);
2554 #endif
2555 } else {
2556 /* RAM case */
2557 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2558 & TARGET_PAGE_MASK)
2559 + addr1);
2560 switch (endian) {
2561 case DEVICE_LITTLE_ENDIAN:
2562 val = ldl_le_p(ptr);
2563 break;
2564 case DEVICE_BIG_ENDIAN:
2565 val = ldl_be_p(ptr);
2566 break;
2567 default:
2568 val = ldl_p(ptr);
2569 break;
2572 return val;
2575 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2577 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2580 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2582 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2585 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2587 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2590 /* warning: addr must be aligned */
2591 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2592 enum device_endian endian)
2594 uint8_t *ptr;
2595 uint64_t val;
2596 MemoryRegion *mr;
2597 hwaddr l = 8;
2598 hwaddr addr1;
2600 mr = address_space_translate(as, addr, &addr1, &l,
2601 false);
2602 if (l < 8 || !memory_access_is_direct(mr, false)) {
2603 /* I/O case */
2604 io_mem_read(mr, addr1, &val, 8);
2605 #if defined(TARGET_WORDS_BIGENDIAN)
2606 if (endian == DEVICE_LITTLE_ENDIAN) {
2607 val = bswap64(val);
2609 #else
2610 if (endian == DEVICE_BIG_ENDIAN) {
2611 val = bswap64(val);
2613 #endif
2614 } else {
2615 /* RAM case */
2616 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2617 & TARGET_PAGE_MASK)
2618 + addr1);
2619 switch (endian) {
2620 case DEVICE_LITTLE_ENDIAN:
2621 val = ldq_le_p(ptr);
2622 break;
2623 case DEVICE_BIG_ENDIAN:
2624 val = ldq_be_p(ptr);
2625 break;
2626 default:
2627 val = ldq_p(ptr);
2628 break;
2631 return val;
2634 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2636 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2639 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2641 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2644 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2646 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2649 /* XXX: optimize */
2650 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2652 uint8_t val;
2653 address_space_rw(as, addr, &val, 1, 0);
2654 return val;
2657 /* warning: addr must be aligned */
2658 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2659 enum device_endian endian)
2661 uint8_t *ptr;
2662 uint64_t val;
2663 MemoryRegion *mr;
2664 hwaddr l = 2;
2665 hwaddr addr1;
2667 mr = address_space_translate(as, addr, &addr1, &l,
2668 false);
2669 if (l < 2 || !memory_access_is_direct(mr, false)) {
2670 /* I/O case */
2671 io_mem_read(mr, addr1, &val, 2);
2672 #if defined(TARGET_WORDS_BIGENDIAN)
2673 if (endian == DEVICE_LITTLE_ENDIAN) {
2674 val = bswap16(val);
2676 #else
2677 if (endian == DEVICE_BIG_ENDIAN) {
2678 val = bswap16(val);
2680 #endif
2681 } else {
2682 /* RAM case */
2683 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2684 & TARGET_PAGE_MASK)
2685 + addr1);
2686 switch (endian) {
2687 case DEVICE_LITTLE_ENDIAN:
2688 val = lduw_le_p(ptr);
2689 break;
2690 case DEVICE_BIG_ENDIAN:
2691 val = lduw_be_p(ptr);
2692 break;
2693 default:
2694 val = lduw_p(ptr);
2695 break;
2698 return val;
2701 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2703 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2706 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2708 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2711 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2713 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2716 /* warning: addr must be aligned. The ram page is not masked as dirty
2717 and the code inside is not invalidated. It is useful if the dirty
2718 bits are used to track modified PTEs */
2719 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2721 uint8_t *ptr;
2722 MemoryRegion *mr;
2723 hwaddr l = 4;
2724 hwaddr addr1;
2726 mr = address_space_translate(as, addr, &addr1, &l,
2727 true);
2728 if (l < 4 || !memory_access_is_direct(mr, true)) {
2729 io_mem_write(mr, addr1, val, 4);
2730 } else {
2731 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2732 ptr = qemu_get_ram_ptr(addr1);
2733 stl_p(ptr, val);
2735 if (unlikely(in_migration)) {
2736 if (cpu_physical_memory_is_clean(addr1)) {
2737 /* invalidate code */
2738 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2739 /* set dirty bit */
2740 cpu_physical_memory_set_dirty_range_nocode(addr1, 4);
2746 /* warning: addr must be aligned */
2747 static inline void stl_phys_internal(AddressSpace *as,
2748 hwaddr addr, uint32_t val,
2749 enum device_endian endian)
2751 uint8_t *ptr;
2752 MemoryRegion *mr;
2753 hwaddr l = 4;
2754 hwaddr addr1;
2756 mr = address_space_translate(as, addr, &addr1, &l,
2757 true);
2758 if (l < 4 || !memory_access_is_direct(mr, true)) {
2759 #if defined(TARGET_WORDS_BIGENDIAN)
2760 if (endian == DEVICE_LITTLE_ENDIAN) {
2761 val = bswap32(val);
2763 #else
2764 if (endian == DEVICE_BIG_ENDIAN) {
2765 val = bswap32(val);
2767 #endif
2768 io_mem_write(mr, addr1, val, 4);
2769 } else {
2770 /* RAM case */
2771 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2772 ptr = qemu_get_ram_ptr(addr1);
2773 switch (endian) {
2774 case DEVICE_LITTLE_ENDIAN:
2775 stl_le_p(ptr, val);
2776 break;
2777 case DEVICE_BIG_ENDIAN:
2778 stl_be_p(ptr, val);
2779 break;
2780 default:
2781 stl_p(ptr, val);
2782 break;
2784 invalidate_and_set_dirty(addr1, 4);
2788 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2790 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2793 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2795 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2798 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2800 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2803 /* XXX: optimize */
2804 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2806 uint8_t v = val;
2807 address_space_rw(as, addr, &v, 1, 1);
2810 /* warning: addr must be aligned */
2811 static inline void stw_phys_internal(AddressSpace *as,
2812 hwaddr addr, uint32_t val,
2813 enum device_endian endian)
2815 uint8_t *ptr;
2816 MemoryRegion *mr;
2817 hwaddr l = 2;
2818 hwaddr addr1;
2820 mr = address_space_translate(as, addr, &addr1, &l, true);
2821 if (l < 2 || !memory_access_is_direct(mr, true)) {
2822 #if defined(TARGET_WORDS_BIGENDIAN)
2823 if (endian == DEVICE_LITTLE_ENDIAN) {
2824 val = bswap16(val);
2826 #else
2827 if (endian == DEVICE_BIG_ENDIAN) {
2828 val = bswap16(val);
2830 #endif
2831 io_mem_write(mr, addr1, val, 2);
2832 } else {
2833 /* RAM case */
2834 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2835 ptr = qemu_get_ram_ptr(addr1);
2836 switch (endian) {
2837 case DEVICE_LITTLE_ENDIAN:
2838 stw_le_p(ptr, val);
2839 break;
2840 case DEVICE_BIG_ENDIAN:
2841 stw_be_p(ptr, val);
2842 break;
2843 default:
2844 stw_p(ptr, val);
2845 break;
2847 invalidate_and_set_dirty(addr1, 2);
2851 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2853 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2856 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2858 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2861 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2863 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2866 /* XXX: optimize */
2867 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2869 val = tswap64(val);
2870 address_space_rw(as, addr, (void *) &val, 8, 1);
2873 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2875 val = cpu_to_le64(val);
2876 address_space_rw(as, addr, (void *) &val, 8, 1);
2879 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2881 val = cpu_to_be64(val);
2882 address_space_rw(as, addr, (void *) &val, 8, 1);
2885 /* virtual memory access for debug (includes writing to ROM) */
2886 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2887 uint8_t *buf, int len, int is_write)
2889 int l;
2890 hwaddr phys_addr;
2891 target_ulong page;
2893 while (len > 0) {
2894 page = addr & TARGET_PAGE_MASK;
2895 phys_addr = cpu_get_phys_page_debug(cpu, page);
2896 /* if no physical page mapped, return an error */
2897 if (phys_addr == -1)
2898 return -1;
2899 l = (page + TARGET_PAGE_SIZE) - addr;
2900 if (l > len)
2901 l = len;
2902 phys_addr += (addr & ~TARGET_PAGE_MASK);
2903 if (is_write) {
2904 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
2905 } else {
2906 address_space_rw(cpu->as, phys_addr, buf, l, 0);
2908 len -= l;
2909 buf += l;
2910 addr += l;
2912 return 0;
2914 #endif
2917 * A helper function for the _utterly broken_ virtio device model to find out if
2918 * it's running on a big endian machine. Don't do this at home kids!
2920 bool target_words_bigendian(void);
2921 bool target_words_bigendian(void)
2923 #if defined(TARGET_WORDS_BIGENDIAN)
2924 return true;
2925 #else
2926 return false;
2927 #endif
2930 #ifndef CONFIG_USER_ONLY
2931 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2933 MemoryRegion*mr;
2934 hwaddr l = 1;
2936 mr = address_space_translate(&address_space_memory,
2937 phys_addr, &phys_addr, &l, false);
2939 return !(memory_region_is_ram(mr) ||
2940 memory_region_is_romd(mr));
2943 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2945 RAMBlock *block;
2947 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2948 func(block->host, block->offset, block->used_length, opaque);
2951 #endif