exec: qemu_ram_alloc_resizeable, qemu_ram_resize
[qemu/rayw.git] / exec.c
blob58ac6d6630ebad958c7da73be4f2bdd0b4336f70
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #include "hw/qdev.h"
30 #include "qemu/osdep.h"
31 #include "sysemu/kvm.h"
32 #include "sysemu/sysemu.h"
33 #include "hw/xen/xen.h"
34 #include "qemu/timer.h"
35 #include "qemu/config-file.h"
36 #include "qemu/error-report.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
51 #include "exec/memory-internal.h"
52 #include "exec/ram_addr.h"
54 #include "qemu/range.h"
56 //#define DEBUG_SUBPAGE
58 #if !defined(CONFIG_USER_ONLY)
59 static bool in_migration;
61 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
63 static MemoryRegion *system_memory;
64 static MemoryRegion *system_io;
66 AddressSpace address_space_io;
67 AddressSpace address_space_memory;
69 MemoryRegion io_mem_rom, io_mem_notdirty;
70 static MemoryRegion io_mem_unassigned;
72 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
73 #define RAM_PREALLOC (1 << 0)
75 /* RAM is mmap-ed with MAP_SHARED */
76 #define RAM_SHARED (1 << 1)
78 /* Only a portion of RAM (used_length) is actually used, and migrated.
79 * This used_length size can change across reboots.
81 #define RAM_RESIZEABLE (1 << 2)
83 #endif
85 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
86 /* current CPU in the current thread. It is only valid inside
87 cpu_exec() */
88 DEFINE_TLS(CPUState *, current_cpu);
89 /* 0 = Do not count executed instructions.
90 1 = Precise instruction counting.
91 2 = Adaptive rate instruction counting. */
92 int use_icount;
94 #if !defined(CONFIG_USER_ONLY)
96 typedef struct PhysPageEntry PhysPageEntry;
98 struct PhysPageEntry {
99 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
100 uint32_t skip : 6;
101 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
102 uint32_t ptr : 26;
105 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
107 /* Size of the L2 (and L3, etc) page tables. */
108 #define ADDR_SPACE_BITS 64
110 #define P_L2_BITS 9
111 #define P_L2_SIZE (1 << P_L2_BITS)
113 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
115 typedef PhysPageEntry Node[P_L2_SIZE];
117 typedef struct PhysPageMap {
118 unsigned sections_nb;
119 unsigned sections_nb_alloc;
120 unsigned nodes_nb;
121 unsigned nodes_nb_alloc;
122 Node *nodes;
123 MemoryRegionSection *sections;
124 } PhysPageMap;
126 struct AddressSpaceDispatch {
127 /* This is a multi-level map on the physical address space.
128 * The bottom level has pointers to MemoryRegionSections.
130 PhysPageEntry phys_map;
131 PhysPageMap map;
132 AddressSpace *as;
135 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
136 typedef struct subpage_t {
137 MemoryRegion iomem;
138 AddressSpace *as;
139 hwaddr base;
140 uint16_t sub_section[TARGET_PAGE_SIZE];
141 } subpage_t;
143 #define PHYS_SECTION_UNASSIGNED 0
144 #define PHYS_SECTION_NOTDIRTY 1
145 #define PHYS_SECTION_ROM 2
146 #define PHYS_SECTION_WATCH 3
148 static void io_mem_init(void);
149 static void memory_map_init(void);
150 static void tcg_commit(MemoryListener *listener);
152 static MemoryRegion io_mem_watch;
153 #endif
155 #if !defined(CONFIG_USER_ONLY)
157 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
159 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
160 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
161 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
162 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
166 static uint32_t phys_map_node_alloc(PhysPageMap *map)
168 unsigned i;
169 uint32_t ret;
171 ret = map->nodes_nb++;
172 assert(ret != PHYS_MAP_NODE_NIL);
173 assert(ret != map->nodes_nb_alloc);
174 for (i = 0; i < P_L2_SIZE; ++i) {
175 map->nodes[ret][i].skip = 1;
176 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
178 return ret;
181 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
182 hwaddr *index, hwaddr *nb, uint16_t leaf,
183 int level)
185 PhysPageEntry *p;
186 int i;
187 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
189 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
190 lp->ptr = phys_map_node_alloc(map);
191 p = map->nodes[lp->ptr];
192 if (level == 0) {
193 for (i = 0; i < P_L2_SIZE; i++) {
194 p[i].skip = 0;
195 p[i].ptr = PHYS_SECTION_UNASSIGNED;
198 } else {
199 p = map->nodes[lp->ptr];
201 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
203 while (*nb && lp < &p[P_L2_SIZE]) {
204 if ((*index & (step - 1)) == 0 && *nb >= step) {
205 lp->skip = 0;
206 lp->ptr = leaf;
207 *index += step;
208 *nb -= step;
209 } else {
210 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
212 ++lp;
216 static void phys_page_set(AddressSpaceDispatch *d,
217 hwaddr index, hwaddr nb,
218 uint16_t leaf)
220 /* Wildly overreserve - it doesn't matter much. */
221 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
223 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
226 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
227 * and update our entry so we can skip it and go directly to the destination.
229 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
231 unsigned valid_ptr = P_L2_SIZE;
232 int valid = 0;
233 PhysPageEntry *p;
234 int i;
236 if (lp->ptr == PHYS_MAP_NODE_NIL) {
237 return;
240 p = nodes[lp->ptr];
241 for (i = 0; i < P_L2_SIZE; i++) {
242 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
243 continue;
246 valid_ptr = i;
247 valid++;
248 if (p[i].skip) {
249 phys_page_compact(&p[i], nodes, compacted);
253 /* We can only compress if there's only one child. */
254 if (valid != 1) {
255 return;
258 assert(valid_ptr < P_L2_SIZE);
260 /* Don't compress if it won't fit in the # of bits we have. */
261 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
262 return;
265 lp->ptr = p[valid_ptr].ptr;
266 if (!p[valid_ptr].skip) {
267 /* If our only child is a leaf, make this a leaf. */
268 /* By design, we should have made this node a leaf to begin with so we
269 * should never reach here.
270 * But since it's so simple to handle this, let's do it just in case we
271 * change this rule.
273 lp->skip = 0;
274 } else {
275 lp->skip += p[valid_ptr].skip;
279 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
281 DECLARE_BITMAP(compacted, nodes_nb);
283 if (d->phys_map.skip) {
284 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
288 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
289 Node *nodes, MemoryRegionSection *sections)
291 PhysPageEntry *p;
292 hwaddr index = addr >> TARGET_PAGE_BITS;
293 int i;
295 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
296 if (lp.ptr == PHYS_MAP_NODE_NIL) {
297 return &sections[PHYS_SECTION_UNASSIGNED];
299 p = nodes[lp.ptr];
300 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
303 if (sections[lp.ptr].size.hi ||
304 range_covers_byte(sections[lp.ptr].offset_within_address_space,
305 sections[lp.ptr].size.lo, addr)) {
306 return &sections[lp.ptr];
307 } else {
308 return &sections[PHYS_SECTION_UNASSIGNED];
312 bool memory_region_is_unassigned(MemoryRegion *mr)
314 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
315 && mr != &io_mem_watch;
318 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
319 hwaddr addr,
320 bool resolve_subpage)
322 MemoryRegionSection *section;
323 subpage_t *subpage;
325 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
326 if (resolve_subpage && section->mr->subpage) {
327 subpage = container_of(section->mr, subpage_t, iomem);
328 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
330 return section;
333 static MemoryRegionSection *
334 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
335 hwaddr *plen, bool resolve_subpage)
337 MemoryRegionSection *section;
338 Int128 diff;
340 section = address_space_lookup_region(d, addr, resolve_subpage);
341 /* Compute offset within MemoryRegionSection */
342 addr -= section->offset_within_address_space;
344 /* Compute offset within MemoryRegion */
345 *xlat = addr + section->offset_within_region;
347 diff = int128_sub(section->mr->size, int128_make64(addr));
348 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
349 return section;
352 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
354 if (memory_region_is_ram(mr)) {
355 return !(is_write && mr->readonly);
357 if (memory_region_is_romd(mr)) {
358 return !is_write;
361 return false;
364 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
365 hwaddr *xlat, hwaddr *plen,
366 bool is_write)
368 IOMMUTLBEntry iotlb;
369 MemoryRegionSection *section;
370 MemoryRegion *mr;
371 hwaddr len = *plen;
373 for (;;) {
374 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
375 mr = section->mr;
377 if (!mr->iommu_ops) {
378 break;
381 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
382 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
383 | (addr & iotlb.addr_mask));
384 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
385 if (!(iotlb.perm & (1 << is_write))) {
386 mr = &io_mem_unassigned;
387 break;
390 as = iotlb.target_as;
393 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
394 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
395 len = MIN(page, len);
398 *plen = len;
399 *xlat = addr;
400 return mr;
403 MemoryRegionSection *
404 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
405 hwaddr *plen)
407 MemoryRegionSection *section;
408 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
410 assert(!section->mr->iommu_ops);
411 return section;
413 #endif
415 void cpu_exec_init_all(void)
417 #if !defined(CONFIG_USER_ONLY)
418 qemu_mutex_init(&ram_list.mutex);
419 memory_map_init();
420 io_mem_init();
421 #endif
424 #if !defined(CONFIG_USER_ONLY)
426 static int cpu_common_post_load(void *opaque, int version_id)
428 CPUState *cpu = opaque;
430 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
431 version_id is increased. */
432 cpu->interrupt_request &= ~0x01;
433 tlb_flush(cpu, 1);
435 return 0;
438 static int cpu_common_pre_load(void *opaque)
440 CPUState *cpu = opaque;
442 cpu->exception_index = 0;
444 return 0;
447 static bool cpu_common_exception_index_needed(void *opaque)
449 CPUState *cpu = opaque;
451 return cpu->exception_index != 0;
454 static const VMStateDescription vmstate_cpu_common_exception_index = {
455 .name = "cpu_common/exception_index",
456 .version_id = 1,
457 .minimum_version_id = 1,
458 .fields = (VMStateField[]) {
459 VMSTATE_INT32(exception_index, CPUState),
460 VMSTATE_END_OF_LIST()
464 const VMStateDescription vmstate_cpu_common = {
465 .name = "cpu_common",
466 .version_id = 1,
467 .minimum_version_id = 1,
468 .pre_load = cpu_common_pre_load,
469 .post_load = cpu_common_post_load,
470 .fields = (VMStateField[]) {
471 VMSTATE_UINT32(halted, CPUState),
472 VMSTATE_UINT32(interrupt_request, CPUState),
473 VMSTATE_END_OF_LIST()
475 .subsections = (VMStateSubsection[]) {
477 .vmsd = &vmstate_cpu_common_exception_index,
478 .needed = cpu_common_exception_index_needed,
479 } , {
480 /* empty */
485 #endif
487 CPUState *qemu_get_cpu(int index)
489 CPUState *cpu;
491 CPU_FOREACH(cpu) {
492 if (cpu->cpu_index == index) {
493 return cpu;
497 return NULL;
500 #if !defined(CONFIG_USER_ONLY)
501 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
503 /* We only support one address space per cpu at the moment. */
504 assert(cpu->as == as);
506 if (cpu->tcg_as_listener) {
507 memory_listener_unregister(cpu->tcg_as_listener);
508 } else {
509 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
511 cpu->tcg_as_listener->commit = tcg_commit;
512 memory_listener_register(cpu->tcg_as_listener, as);
514 #endif
516 void cpu_exec_init(CPUArchState *env)
518 CPUState *cpu = ENV_GET_CPU(env);
519 CPUClass *cc = CPU_GET_CLASS(cpu);
520 CPUState *some_cpu;
521 int cpu_index;
523 #if defined(CONFIG_USER_ONLY)
524 cpu_list_lock();
525 #endif
526 cpu_index = 0;
527 CPU_FOREACH(some_cpu) {
528 cpu_index++;
530 cpu->cpu_index = cpu_index;
531 cpu->numa_node = 0;
532 QTAILQ_INIT(&cpu->breakpoints);
533 QTAILQ_INIT(&cpu->watchpoints);
534 #ifndef CONFIG_USER_ONLY
535 cpu->as = &address_space_memory;
536 cpu->thread_id = qemu_get_thread_id();
537 #endif
538 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
539 #if defined(CONFIG_USER_ONLY)
540 cpu_list_unlock();
541 #endif
542 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
543 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
545 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
546 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
547 cpu_save, cpu_load, env);
548 assert(cc->vmsd == NULL);
549 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
550 #endif
551 if (cc->vmsd != NULL) {
552 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
556 #if defined(TARGET_HAS_ICE)
557 #if defined(CONFIG_USER_ONLY)
558 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
560 tb_invalidate_phys_page_range(pc, pc + 1, 0);
562 #else
563 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
565 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
566 if (phys != -1) {
567 tb_invalidate_phys_addr(cpu->as,
568 phys | (pc & ~TARGET_PAGE_MASK));
571 #endif
572 #endif /* TARGET_HAS_ICE */
574 #if defined(CONFIG_USER_ONLY)
575 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
580 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
581 int flags)
583 return -ENOSYS;
586 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
590 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
591 int flags, CPUWatchpoint **watchpoint)
593 return -ENOSYS;
595 #else
596 /* Add a watchpoint. */
597 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
598 int flags, CPUWatchpoint **watchpoint)
600 CPUWatchpoint *wp;
602 /* forbid ranges which are empty or run off the end of the address space */
603 if (len == 0 || (addr + len - 1) < addr) {
604 error_report("tried to set invalid watchpoint at %"
605 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
606 return -EINVAL;
608 wp = g_malloc(sizeof(*wp));
610 wp->vaddr = addr;
611 wp->len = len;
612 wp->flags = flags;
614 /* keep all GDB-injected watchpoints in front */
615 if (flags & BP_GDB) {
616 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
617 } else {
618 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
621 tlb_flush_page(cpu, addr);
623 if (watchpoint)
624 *watchpoint = wp;
625 return 0;
628 /* Remove a specific watchpoint. */
629 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
630 int flags)
632 CPUWatchpoint *wp;
634 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
635 if (addr == wp->vaddr && len == wp->len
636 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
637 cpu_watchpoint_remove_by_ref(cpu, wp);
638 return 0;
641 return -ENOENT;
644 /* Remove a specific watchpoint by reference. */
645 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
647 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
649 tlb_flush_page(cpu, watchpoint->vaddr);
651 g_free(watchpoint);
654 /* Remove all matching watchpoints. */
655 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
657 CPUWatchpoint *wp, *next;
659 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
660 if (wp->flags & mask) {
661 cpu_watchpoint_remove_by_ref(cpu, wp);
666 /* Return true if this watchpoint address matches the specified
667 * access (ie the address range covered by the watchpoint overlaps
668 * partially or completely with the address range covered by the
669 * access).
671 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
672 vaddr addr,
673 vaddr len)
675 /* We know the lengths are non-zero, but a little caution is
676 * required to avoid errors in the case where the range ends
677 * exactly at the top of the address space and so addr + len
678 * wraps round to zero.
680 vaddr wpend = wp->vaddr + wp->len - 1;
681 vaddr addrend = addr + len - 1;
683 return !(addr > wpend || wp->vaddr > addrend);
686 #endif
688 /* Add a breakpoint. */
689 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
690 CPUBreakpoint **breakpoint)
692 #if defined(TARGET_HAS_ICE)
693 CPUBreakpoint *bp;
695 bp = g_malloc(sizeof(*bp));
697 bp->pc = pc;
698 bp->flags = flags;
700 /* keep all GDB-injected breakpoints in front */
701 if (flags & BP_GDB) {
702 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
703 } else {
704 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
707 breakpoint_invalidate(cpu, pc);
709 if (breakpoint) {
710 *breakpoint = bp;
712 return 0;
713 #else
714 return -ENOSYS;
715 #endif
718 /* Remove a specific breakpoint. */
719 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
721 #if defined(TARGET_HAS_ICE)
722 CPUBreakpoint *bp;
724 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
725 if (bp->pc == pc && bp->flags == flags) {
726 cpu_breakpoint_remove_by_ref(cpu, bp);
727 return 0;
730 return -ENOENT;
731 #else
732 return -ENOSYS;
733 #endif
736 /* Remove a specific breakpoint by reference. */
737 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
739 #if defined(TARGET_HAS_ICE)
740 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
742 breakpoint_invalidate(cpu, breakpoint->pc);
744 g_free(breakpoint);
745 #endif
748 /* Remove all matching breakpoints. */
749 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
751 #if defined(TARGET_HAS_ICE)
752 CPUBreakpoint *bp, *next;
754 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
755 if (bp->flags & mask) {
756 cpu_breakpoint_remove_by_ref(cpu, bp);
759 #endif
762 /* enable or disable single step mode. EXCP_DEBUG is returned by the
763 CPU loop after each instruction */
764 void cpu_single_step(CPUState *cpu, int enabled)
766 #if defined(TARGET_HAS_ICE)
767 if (cpu->singlestep_enabled != enabled) {
768 cpu->singlestep_enabled = enabled;
769 if (kvm_enabled()) {
770 kvm_update_guest_debug(cpu, 0);
771 } else {
772 /* must flush all the translated code to avoid inconsistencies */
773 /* XXX: only flush what is necessary */
774 CPUArchState *env = cpu->env_ptr;
775 tb_flush(env);
778 #endif
781 void cpu_abort(CPUState *cpu, const char *fmt, ...)
783 va_list ap;
784 va_list ap2;
786 va_start(ap, fmt);
787 va_copy(ap2, ap);
788 fprintf(stderr, "qemu: fatal: ");
789 vfprintf(stderr, fmt, ap);
790 fprintf(stderr, "\n");
791 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
792 if (qemu_log_enabled()) {
793 qemu_log("qemu: fatal: ");
794 qemu_log_vprintf(fmt, ap2);
795 qemu_log("\n");
796 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
797 qemu_log_flush();
798 qemu_log_close();
800 va_end(ap2);
801 va_end(ap);
802 #if defined(CONFIG_USER_ONLY)
804 struct sigaction act;
805 sigfillset(&act.sa_mask);
806 act.sa_handler = SIG_DFL;
807 sigaction(SIGABRT, &act, NULL);
809 #endif
810 abort();
813 #if !defined(CONFIG_USER_ONLY)
814 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
816 RAMBlock *block;
818 /* The list is protected by the iothread lock here. */
819 block = ram_list.mru_block;
820 if (block && addr - block->offset < block->max_length) {
821 goto found;
823 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
824 if (addr - block->offset < block->max_length) {
825 goto found;
829 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
830 abort();
832 found:
833 ram_list.mru_block = block;
834 return block;
837 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
839 ram_addr_t start1;
840 RAMBlock *block;
841 ram_addr_t end;
843 end = TARGET_PAGE_ALIGN(start + length);
844 start &= TARGET_PAGE_MASK;
846 block = qemu_get_ram_block(start);
847 assert(block == qemu_get_ram_block(end - 1));
848 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
849 cpu_tlb_reset_dirty_all(start1, length);
852 /* Note: start and end must be within the same ram block. */
853 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
854 unsigned client)
856 if (length == 0)
857 return;
858 cpu_physical_memory_clear_dirty_range_type(start, length, client);
860 if (tcg_enabled()) {
861 tlb_reset_dirty_range_all(start, length);
865 static void cpu_physical_memory_set_dirty_tracking(bool enable)
867 in_migration = enable;
870 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
871 MemoryRegionSection *section,
872 target_ulong vaddr,
873 hwaddr paddr, hwaddr xlat,
874 int prot,
875 target_ulong *address)
877 hwaddr iotlb;
878 CPUWatchpoint *wp;
880 if (memory_region_is_ram(section->mr)) {
881 /* Normal RAM. */
882 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
883 + xlat;
884 if (!section->readonly) {
885 iotlb |= PHYS_SECTION_NOTDIRTY;
886 } else {
887 iotlb |= PHYS_SECTION_ROM;
889 } else {
890 iotlb = section - section->address_space->dispatch->map.sections;
891 iotlb += xlat;
894 /* Make accesses to pages with watchpoints go via the
895 watchpoint trap routines. */
896 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
897 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
898 /* Avoid trapping reads of pages with a write breakpoint. */
899 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
900 iotlb = PHYS_SECTION_WATCH + paddr;
901 *address |= TLB_MMIO;
902 break;
907 return iotlb;
909 #endif /* defined(CONFIG_USER_ONLY) */
911 #if !defined(CONFIG_USER_ONLY)
913 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
914 uint16_t section);
915 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
917 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
918 qemu_anon_ram_alloc;
921 * Set a custom physical guest memory alloator.
922 * Accelerators with unusual needs may need this. Hopefully, we can
923 * get rid of it eventually.
925 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
927 phys_mem_alloc = alloc;
930 static uint16_t phys_section_add(PhysPageMap *map,
931 MemoryRegionSection *section)
933 /* The physical section number is ORed with a page-aligned
934 * pointer to produce the iotlb entries. Thus it should
935 * never overflow into the page-aligned value.
937 assert(map->sections_nb < TARGET_PAGE_SIZE);
939 if (map->sections_nb == map->sections_nb_alloc) {
940 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
941 map->sections = g_renew(MemoryRegionSection, map->sections,
942 map->sections_nb_alloc);
944 map->sections[map->sections_nb] = *section;
945 memory_region_ref(section->mr);
946 return map->sections_nb++;
949 static void phys_section_destroy(MemoryRegion *mr)
951 memory_region_unref(mr);
953 if (mr->subpage) {
954 subpage_t *subpage = container_of(mr, subpage_t, iomem);
955 object_unref(OBJECT(&subpage->iomem));
956 g_free(subpage);
960 static void phys_sections_free(PhysPageMap *map)
962 while (map->sections_nb > 0) {
963 MemoryRegionSection *section = &map->sections[--map->sections_nb];
964 phys_section_destroy(section->mr);
966 g_free(map->sections);
967 g_free(map->nodes);
970 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
972 subpage_t *subpage;
973 hwaddr base = section->offset_within_address_space
974 & TARGET_PAGE_MASK;
975 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
976 d->map.nodes, d->map.sections);
977 MemoryRegionSection subsection = {
978 .offset_within_address_space = base,
979 .size = int128_make64(TARGET_PAGE_SIZE),
981 hwaddr start, end;
983 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
985 if (!(existing->mr->subpage)) {
986 subpage = subpage_init(d->as, base);
987 subsection.address_space = d->as;
988 subsection.mr = &subpage->iomem;
989 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
990 phys_section_add(&d->map, &subsection));
991 } else {
992 subpage = container_of(existing->mr, subpage_t, iomem);
994 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
995 end = start + int128_get64(section->size) - 1;
996 subpage_register(subpage, start, end,
997 phys_section_add(&d->map, section));
1001 static void register_multipage(AddressSpaceDispatch *d,
1002 MemoryRegionSection *section)
1004 hwaddr start_addr = section->offset_within_address_space;
1005 uint16_t section_index = phys_section_add(&d->map, section);
1006 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1007 TARGET_PAGE_BITS));
1009 assert(num_pages);
1010 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1013 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1015 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1016 AddressSpaceDispatch *d = as->next_dispatch;
1017 MemoryRegionSection now = *section, remain = *section;
1018 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1020 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1021 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1022 - now.offset_within_address_space;
1024 now.size = int128_min(int128_make64(left), now.size);
1025 register_subpage(d, &now);
1026 } else {
1027 now.size = int128_zero();
1029 while (int128_ne(remain.size, now.size)) {
1030 remain.size = int128_sub(remain.size, now.size);
1031 remain.offset_within_address_space += int128_get64(now.size);
1032 remain.offset_within_region += int128_get64(now.size);
1033 now = remain;
1034 if (int128_lt(remain.size, page_size)) {
1035 register_subpage(d, &now);
1036 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1037 now.size = page_size;
1038 register_subpage(d, &now);
1039 } else {
1040 now.size = int128_and(now.size, int128_neg(page_size));
1041 register_multipage(d, &now);
1046 void qemu_flush_coalesced_mmio_buffer(void)
1048 if (kvm_enabled())
1049 kvm_flush_coalesced_mmio_buffer();
1052 void qemu_mutex_lock_ramlist(void)
1054 qemu_mutex_lock(&ram_list.mutex);
1057 void qemu_mutex_unlock_ramlist(void)
1059 qemu_mutex_unlock(&ram_list.mutex);
1062 #ifdef __linux__
1064 #include <sys/vfs.h>
1066 #define HUGETLBFS_MAGIC 0x958458f6
1068 static long gethugepagesize(const char *path, Error **errp)
1070 struct statfs fs;
1071 int ret;
1073 do {
1074 ret = statfs(path, &fs);
1075 } while (ret != 0 && errno == EINTR);
1077 if (ret != 0) {
1078 error_setg_errno(errp, errno, "failed to get page size of file %s",
1079 path);
1080 return 0;
1083 if (fs.f_type != HUGETLBFS_MAGIC)
1084 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1086 return fs.f_bsize;
1089 static void *file_ram_alloc(RAMBlock *block,
1090 ram_addr_t memory,
1091 const char *path,
1092 Error **errp)
1094 char *filename;
1095 char *sanitized_name;
1096 char *c;
1097 void *area = NULL;
1098 int fd;
1099 uint64_t hpagesize;
1100 Error *local_err = NULL;
1102 hpagesize = gethugepagesize(path, &local_err);
1103 if (local_err) {
1104 error_propagate(errp, local_err);
1105 goto error;
1107 block->mr->align = hpagesize;
1109 if (memory < hpagesize) {
1110 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1111 "or larger than huge page size 0x%" PRIx64,
1112 memory, hpagesize);
1113 goto error;
1116 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1117 error_setg(errp,
1118 "host lacks kvm mmu notifiers, -mem-path unsupported");
1119 goto error;
1122 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1123 sanitized_name = g_strdup(memory_region_name(block->mr));
1124 for (c = sanitized_name; *c != '\0'; c++) {
1125 if (*c == '/')
1126 *c = '_';
1129 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1130 sanitized_name);
1131 g_free(sanitized_name);
1133 fd = mkstemp(filename);
1134 if (fd < 0) {
1135 error_setg_errno(errp, errno,
1136 "unable to create backing store for hugepages");
1137 g_free(filename);
1138 goto error;
1140 unlink(filename);
1141 g_free(filename);
1143 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1146 * ftruncate is not supported by hugetlbfs in older
1147 * hosts, so don't bother bailing out on errors.
1148 * If anything goes wrong with it under other filesystems,
1149 * mmap will fail.
1151 if (ftruncate(fd, memory)) {
1152 perror("ftruncate");
1155 area = mmap(0, memory, PROT_READ | PROT_WRITE,
1156 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
1157 fd, 0);
1158 if (area == MAP_FAILED) {
1159 error_setg_errno(errp, errno,
1160 "unable to map backing store for hugepages");
1161 close(fd);
1162 goto error;
1165 if (mem_prealloc) {
1166 os_mem_prealloc(fd, area, memory);
1169 block->fd = fd;
1170 return area;
1172 error:
1173 if (mem_prealloc) {
1174 error_report("%s\n", error_get_pretty(*errp));
1175 exit(1);
1177 return NULL;
1179 #endif
1181 static ram_addr_t find_ram_offset(ram_addr_t size)
1183 RAMBlock *block, *next_block;
1184 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1186 assert(size != 0); /* it would hand out same offset multiple times */
1188 if (QTAILQ_EMPTY(&ram_list.blocks))
1189 return 0;
1191 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1192 ram_addr_t end, next = RAM_ADDR_MAX;
1194 end = block->offset + block->max_length;
1196 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1197 if (next_block->offset >= end) {
1198 next = MIN(next, next_block->offset);
1201 if (next - end >= size && next - end < mingap) {
1202 offset = end;
1203 mingap = next - end;
1207 if (offset == RAM_ADDR_MAX) {
1208 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1209 (uint64_t)size);
1210 abort();
1213 return offset;
1216 ram_addr_t last_ram_offset(void)
1218 RAMBlock *block;
1219 ram_addr_t last = 0;
1221 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1222 last = MAX(last, block->offset + block->max_length);
1224 return last;
1227 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1229 int ret;
1231 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1232 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1233 "dump-guest-core", true)) {
1234 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1235 if (ret) {
1236 perror("qemu_madvise");
1237 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1238 "but dump_guest_core=off specified\n");
1243 static RAMBlock *find_ram_block(ram_addr_t addr)
1245 RAMBlock *block;
1247 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1248 if (block->offset == addr) {
1249 return block;
1253 return NULL;
1256 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1258 RAMBlock *new_block = find_ram_block(addr);
1259 RAMBlock *block;
1261 assert(new_block);
1262 assert(!new_block->idstr[0]);
1264 if (dev) {
1265 char *id = qdev_get_dev_path(dev);
1266 if (id) {
1267 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1268 g_free(id);
1271 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1273 /* This assumes the iothread lock is taken here too. */
1274 qemu_mutex_lock_ramlist();
1275 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1276 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1277 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1278 new_block->idstr);
1279 abort();
1282 qemu_mutex_unlock_ramlist();
1285 void qemu_ram_unset_idstr(ram_addr_t addr)
1287 RAMBlock *block = find_ram_block(addr);
1289 if (block) {
1290 memset(block->idstr, 0, sizeof(block->idstr));
1294 static int memory_try_enable_merging(void *addr, size_t len)
1296 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1297 /* disabled by the user */
1298 return 0;
1301 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1304 /* Only legal before guest might have detected the memory size: e.g. on
1305 * incoming migration, or right after reset.
1307 * As memory core doesn't know how is memory accessed, it is up to
1308 * resize callback to update device state and/or add assertions to detect
1309 * misuse, if necessary.
1311 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1313 RAMBlock *block = find_ram_block(base);
1315 assert(block);
1317 if (block->used_length == newsize) {
1318 return 0;
1321 if (!(block->flags & RAM_RESIZEABLE)) {
1322 error_setg_errno(errp, EINVAL,
1323 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1324 " in != 0x" RAM_ADDR_FMT, block->idstr,
1325 newsize, block->used_length);
1326 return -EINVAL;
1329 if (block->max_length < newsize) {
1330 error_setg_errno(errp, EINVAL,
1331 "Length too large: %s: 0x" RAM_ADDR_FMT
1332 " > 0x" RAM_ADDR_FMT, block->idstr,
1333 newsize, block->max_length);
1334 return -EINVAL;
1337 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1338 block->used_length = newsize;
1339 cpu_physical_memory_set_dirty_range(block->offset, block->used_length);
1340 memory_region_set_size(block->mr, newsize);
1341 if (block->resized) {
1342 block->resized(block->idstr, newsize, block->host);
1344 return 0;
1347 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1349 RAMBlock *block;
1350 ram_addr_t old_ram_size, new_ram_size;
1352 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1354 /* This assumes the iothread lock is taken here too. */
1355 qemu_mutex_lock_ramlist();
1356 new_block->offset = find_ram_offset(new_block->max_length);
1358 if (!new_block->host) {
1359 if (xen_enabled()) {
1360 xen_ram_alloc(new_block->offset, new_block->max_length,
1361 new_block->mr);
1362 } else {
1363 new_block->host = phys_mem_alloc(new_block->max_length,
1364 &new_block->mr->align);
1365 if (!new_block->host) {
1366 error_setg_errno(errp, errno,
1367 "cannot set up guest memory '%s'",
1368 memory_region_name(new_block->mr));
1369 qemu_mutex_unlock_ramlist();
1370 return -1;
1372 memory_try_enable_merging(new_block->host, new_block->max_length);
1376 /* Keep the list sorted from biggest to smallest block. */
1377 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1378 if (block->max_length < new_block->max_length) {
1379 break;
1382 if (block) {
1383 QTAILQ_INSERT_BEFORE(block, new_block, next);
1384 } else {
1385 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1387 ram_list.mru_block = NULL;
1389 ram_list.version++;
1390 qemu_mutex_unlock_ramlist();
1392 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1394 if (new_ram_size > old_ram_size) {
1395 int i;
1396 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1397 ram_list.dirty_memory[i] =
1398 bitmap_zero_extend(ram_list.dirty_memory[i],
1399 old_ram_size, new_ram_size);
1402 cpu_physical_memory_set_dirty_range(new_block->offset,
1403 new_block->used_length);
1405 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1406 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1407 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1409 if (kvm_enabled()) {
1410 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1413 return new_block->offset;
1416 #ifdef __linux__
1417 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1418 bool share, const char *mem_path,
1419 Error **errp)
1421 RAMBlock *new_block;
1422 ram_addr_t addr;
1423 Error *local_err = NULL;
1425 if (xen_enabled()) {
1426 error_setg(errp, "-mem-path not supported with Xen");
1427 return -1;
1430 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1432 * file_ram_alloc() needs to allocate just like
1433 * phys_mem_alloc, but we haven't bothered to provide
1434 * a hook there.
1436 error_setg(errp,
1437 "-mem-path not supported with this accelerator");
1438 return -1;
1441 size = TARGET_PAGE_ALIGN(size);
1442 new_block = g_malloc0(sizeof(*new_block));
1443 new_block->mr = mr;
1444 new_block->used_length = size;
1445 new_block->max_length = size;
1446 new_block->flags = share ? RAM_SHARED : 0;
1447 new_block->host = file_ram_alloc(new_block, size,
1448 mem_path, errp);
1449 if (!new_block->host) {
1450 g_free(new_block);
1451 return -1;
1454 addr = ram_block_add(new_block, &local_err);
1455 if (local_err) {
1456 g_free(new_block);
1457 error_propagate(errp, local_err);
1458 return -1;
1460 return addr;
1462 #endif
1464 static
1465 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1466 void (*resized)(const char*,
1467 uint64_t length,
1468 void *host),
1469 void *host, bool resizeable,
1470 MemoryRegion *mr, Error **errp)
1472 RAMBlock *new_block;
1473 ram_addr_t addr;
1474 Error *local_err = NULL;
1476 size = TARGET_PAGE_ALIGN(size);
1477 max_size = TARGET_PAGE_ALIGN(max_size);
1478 new_block = g_malloc0(sizeof(*new_block));
1479 new_block->mr = mr;
1480 new_block->resized = resized;
1481 new_block->used_length = size;
1482 new_block->max_length = max_size;
1483 assert(max_size >= size);
1484 new_block->fd = -1;
1485 new_block->host = host;
1486 if (host) {
1487 new_block->flags |= RAM_PREALLOC;
1489 if (resizeable) {
1490 new_block->flags |= RAM_RESIZEABLE;
1492 addr = ram_block_add(new_block, &local_err);
1493 if (local_err) {
1494 g_free(new_block);
1495 error_propagate(errp, local_err);
1496 return -1;
1498 return addr;
1501 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1502 MemoryRegion *mr, Error **errp)
1504 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1507 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1509 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1512 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1513 void (*resized)(const char*,
1514 uint64_t length,
1515 void *host),
1516 MemoryRegion *mr, Error **errp)
1518 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1521 void qemu_ram_free_from_ptr(ram_addr_t addr)
1523 RAMBlock *block;
1525 /* This assumes the iothread lock is taken here too. */
1526 qemu_mutex_lock_ramlist();
1527 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1528 if (addr == block->offset) {
1529 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1530 ram_list.mru_block = NULL;
1531 ram_list.version++;
1532 g_free(block);
1533 break;
1536 qemu_mutex_unlock_ramlist();
1539 void qemu_ram_free(ram_addr_t addr)
1541 RAMBlock *block;
1543 /* This assumes the iothread lock is taken here too. */
1544 qemu_mutex_lock_ramlist();
1545 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1546 if (addr == block->offset) {
1547 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1548 ram_list.mru_block = NULL;
1549 ram_list.version++;
1550 if (block->flags & RAM_PREALLOC) {
1552 } else if (xen_enabled()) {
1553 xen_invalidate_map_cache_entry(block->host);
1554 #ifndef _WIN32
1555 } else if (block->fd >= 0) {
1556 munmap(block->host, block->max_length);
1557 close(block->fd);
1558 #endif
1559 } else {
1560 qemu_anon_ram_free(block->host, block->max_length);
1562 g_free(block);
1563 break;
1566 qemu_mutex_unlock_ramlist();
1570 #ifndef _WIN32
1571 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1573 RAMBlock *block;
1574 ram_addr_t offset;
1575 int flags;
1576 void *area, *vaddr;
1578 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1579 offset = addr - block->offset;
1580 if (offset < block->max_length) {
1581 vaddr = ramblock_ptr(block, offset);
1582 if (block->flags & RAM_PREALLOC) {
1584 } else if (xen_enabled()) {
1585 abort();
1586 } else {
1587 flags = MAP_FIXED;
1588 munmap(vaddr, length);
1589 if (block->fd >= 0) {
1590 flags |= (block->flags & RAM_SHARED ?
1591 MAP_SHARED : MAP_PRIVATE);
1592 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1593 flags, block->fd, offset);
1594 } else {
1596 * Remap needs to match alloc. Accelerators that
1597 * set phys_mem_alloc never remap. If they did,
1598 * we'd need a remap hook here.
1600 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1602 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1603 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1604 flags, -1, 0);
1606 if (area != vaddr) {
1607 fprintf(stderr, "Could not remap addr: "
1608 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1609 length, addr);
1610 exit(1);
1612 memory_try_enable_merging(vaddr, length);
1613 qemu_ram_setup_dump(vaddr, length);
1615 return;
1619 #endif /* !_WIN32 */
1621 int qemu_get_ram_fd(ram_addr_t addr)
1623 RAMBlock *block = qemu_get_ram_block(addr);
1625 return block->fd;
1628 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1630 RAMBlock *block = qemu_get_ram_block(addr);
1632 return ramblock_ptr(block, 0);
1635 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1636 With the exception of the softmmu code in this file, this should
1637 only be used for local memory (e.g. video ram) that the device owns,
1638 and knows it isn't going to access beyond the end of the block.
1640 It should not be used for general purpose DMA.
1641 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1643 void *qemu_get_ram_ptr(ram_addr_t addr)
1645 RAMBlock *block = qemu_get_ram_block(addr);
1647 if (xen_enabled()) {
1648 /* We need to check if the requested address is in the RAM
1649 * because we don't want to map the entire memory in QEMU.
1650 * In that case just map until the end of the page.
1652 if (block->offset == 0) {
1653 return xen_map_cache(addr, 0, 0);
1654 } else if (block->host == NULL) {
1655 block->host =
1656 xen_map_cache(block->offset, block->max_length, 1);
1659 return ramblock_ptr(block, addr - block->offset);
1662 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1663 * but takes a size argument */
1664 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1666 if (*size == 0) {
1667 return NULL;
1669 if (xen_enabled()) {
1670 return xen_map_cache(addr, *size, 1);
1671 } else {
1672 RAMBlock *block;
1674 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1675 if (addr - block->offset < block->max_length) {
1676 if (addr - block->offset + *size > block->max_length)
1677 *size = block->max_length - addr + block->offset;
1678 return ramblock_ptr(block, addr - block->offset);
1682 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1683 abort();
1687 /* Some of the softmmu routines need to translate from a host pointer
1688 (typically a TLB entry) back to a ram offset. */
1689 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1691 RAMBlock *block;
1692 uint8_t *host = ptr;
1694 if (xen_enabled()) {
1695 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1696 return qemu_get_ram_block(*ram_addr)->mr;
1699 block = ram_list.mru_block;
1700 if (block && block->host && host - block->host < block->max_length) {
1701 goto found;
1704 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1705 /* This case append when the block is not mapped. */
1706 if (block->host == NULL) {
1707 continue;
1709 if (host - block->host < block->max_length) {
1710 goto found;
1714 return NULL;
1716 found:
1717 *ram_addr = block->offset + (host - block->host);
1718 return block->mr;
1721 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1722 uint64_t val, unsigned size)
1724 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1725 tb_invalidate_phys_page_fast(ram_addr, size);
1727 switch (size) {
1728 case 1:
1729 stb_p(qemu_get_ram_ptr(ram_addr), val);
1730 break;
1731 case 2:
1732 stw_p(qemu_get_ram_ptr(ram_addr), val);
1733 break;
1734 case 4:
1735 stl_p(qemu_get_ram_ptr(ram_addr), val);
1736 break;
1737 default:
1738 abort();
1740 cpu_physical_memory_set_dirty_range_nocode(ram_addr, size);
1741 /* we remove the notdirty callback only if the code has been
1742 flushed */
1743 if (!cpu_physical_memory_is_clean(ram_addr)) {
1744 CPUArchState *env = current_cpu->env_ptr;
1745 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1749 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1750 unsigned size, bool is_write)
1752 return is_write;
1755 static const MemoryRegionOps notdirty_mem_ops = {
1756 .write = notdirty_mem_write,
1757 .valid.accepts = notdirty_mem_accepts,
1758 .endianness = DEVICE_NATIVE_ENDIAN,
1761 /* Generate a debug exception if a watchpoint has been hit. */
1762 static void check_watchpoint(int offset, int len, int flags)
1764 CPUState *cpu = current_cpu;
1765 CPUArchState *env = cpu->env_ptr;
1766 target_ulong pc, cs_base;
1767 target_ulong vaddr;
1768 CPUWatchpoint *wp;
1769 int cpu_flags;
1771 if (cpu->watchpoint_hit) {
1772 /* We re-entered the check after replacing the TB. Now raise
1773 * the debug interrupt so that is will trigger after the
1774 * current instruction. */
1775 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1776 return;
1778 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1779 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1780 if (cpu_watchpoint_address_matches(wp, vaddr, len)
1781 && (wp->flags & flags)) {
1782 if (flags == BP_MEM_READ) {
1783 wp->flags |= BP_WATCHPOINT_HIT_READ;
1784 } else {
1785 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
1787 wp->hitaddr = vaddr;
1788 if (!cpu->watchpoint_hit) {
1789 cpu->watchpoint_hit = wp;
1790 tb_check_watchpoint(cpu);
1791 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1792 cpu->exception_index = EXCP_DEBUG;
1793 cpu_loop_exit(cpu);
1794 } else {
1795 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1796 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1797 cpu_resume_from_signal(cpu, NULL);
1800 } else {
1801 wp->flags &= ~BP_WATCHPOINT_HIT;
1806 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1807 so these check for a hit then pass through to the normal out-of-line
1808 phys routines. */
1809 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1810 unsigned size)
1812 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_READ);
1813 switch (size) {
1814 case 1: return ldub_phys(&address_space_memory, addr);
1815 case 2: return lduw_phys(&address_space_memory, addr);
1816 case 4: return ldl_phys(&address_space_memory, addr);
1817 default: abort();
1821 static void watch_mem_write(void *opaque, hwaddr addr,
1822 uint64_t val, unsigned size)
1824 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_WRITE);
1825 switch (size) {
1826 case 1:
1827 stb_phys(&address_space_memory, addr, val);
1828 break;
1829 case 2:
1830 stw_phys(&address_space_memory, addr, val);
1831 break;
1832 case 4:
1833 stl_phys(&address_space_memory, addr, val);
1834 break;
1835 default: abort();
1839 static const MemoryRegionOps watch_mem_ops = {
1840 .read = watch_mem_read,
1841 .write = watch_mem_write,
1842 .endianness = DEVICE_NATIVE_ENDIAN,
1845 static uint64_t subpage_read(void *opaque, hwaddr addr,
1846 unsigned len)
1848 subpage_t *subpage = opaque;
1849 uint8_t buf[4];
1851 #if defined(DEBUG_SUBPAGE)
1852 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1853 subpage, len, addr);
1854 #endif
1855 address_space_read(subpage->as, addr + subpage->base, buf, len);
1856 switch (len) {
1857 case 1:
1858 return ldub_p(buf);
1859 case 2:
1860 return lduw_p(buf);
1861 case 4:
1862 return ldl_p(buf);
1863 default:
1864 abort();
1868 static void subpage_write(void *opaque, hwaddr addr,
1869 uint64_t value, unsigned len)
1871 subpage_t *subpage = opaque;
1872 uint8_t buf[4];
1874 #if defined(DEBUG_SUBPAGE)
1875 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1876 " value %"PRIx64"\n",
1877 __func__, subpage, len, addr, value);
1878 #endif
1879 switch (len) {
1880 case 1:
1881 stb_p(buf, value);
1882 break;
1883 case 2:
1884 stw_p(buf, value);
1885 break;
1886 case 4:
1887 stl_p(buf, value);
1888 break;
1889 default:
1890 abort();
1892 address_space_write(subpage->as, addr + subpage->base, buf, len);
1895 static bool subpage_accepts(void *opaque, hwaddr addr,
1896 unsigned len, bool is_write)
1898 subpage_t *subpage = opaque;
1899 #if defined(DEBUG_SUBPAGE)
1900 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1901 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1902 #endif
1904 return address_space_access_valid(subpage->as, addr + subpage->base,
1905 len, is_write);
1908 static const MemoryRegionOps subpage_ops = {
1909 .read = subpage_read,
1910 .write = subpage_write,
1911 .valid.accepts = subpage_accepts,
1912 .endianness = DEVICE_NATIVE_ENDIAN,
1915 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1916 uint16_t section)
1918 int idx, eidx;
1920 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1921 return -1;
1922 idx = SUBPAGE_IDX(start);
1923 eidx = SUBPAGE_IDX(end);
1924 #if defined(DEBUG_SUBPAGE)
1925 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1926 __func__, mmio, start, end, idx, eidx, section);
1927 #endif
1928 for (; idx <= eidx; idx++) {
1929 mmio->sub_section[idx] = section;
1932 return 0;
1935 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1937 subpage_t *mmio;
1939 mmio = g_malloc0(sizeof(subpage_t));
1941 mmio->as = as;
1942 mmio->base = base;
1943 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1944 NULL, TARGET_PAGE_SIZE);
1945 mmio->iomem.subpage = true;
1946 #if defined(DEBUG_SUBPAGE)
1947 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1948 mmio, base, TARGET_PAGE_SIZE);
1949 #endif
1950 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1952 return mmio;
1955 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
1956 MemoryRegion *mr)
1958 assert(as);
1959 MemoryRegionSection section = {
1960 .address_space = as,
1961 .mr = mr,
1962 .offset_within_address_space = 0,
1963 .offset_within_region = 0,
1964 .size = int128_2_64(),
1967 return phys_section_add(map, &section);
1970 MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1972 return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1975 static void io_mem_init(void)
1977 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
1978 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1979 NULL, UINT64_MAX);
1980 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1981 NULL, UINT64_MAX);
1982 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1983 NULL, UINT64_MAX);
1986 static void mem_begin(MemoryListener *listener)
1988 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1989 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1990 uint16_t n;
1992 n = dummy_section(&d->map, as, &io_mem_unassigned);
1993 assert(n == PHYS_SECTION_UNASSIGNED);
1994 n = dummy_section(&d->map, as, &io_mem_notdirty);
1995 assert(n == PHYS_SECTION_NOTDIRTY);
1996 n = dummy_section(&d->map, as, &io_mem_rom);
1997 assert(n == PHYS_SECTION_ROM);
1998 n = dummy_section(&d->map, as, &io_mem_watch);
1999 assert(n == PHYS_SECTION_WATCH);
2001 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2002 d->as = as;
2003 as->next_dispatch = d;
2006 static void mem_commit(MemoryListener *listener)
2008 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2009 AddressSpaceDispatch *cur = as->dispatch;
2010 AddressSpaceDispatch *next = as->next_dispatch;
2012 phys_page_compact_all(next, next->map.nodes_nb);
2014 as->dispatch = next;
2016 if (cur) {
2017 phys_sections_free(&cur->map);
2018 g_free(cur);
2022 static void tcg_commit(MemoryListener *listener)
2024 CPUState *cpu;
2026 /* since each CPU stores ram addresses in its TLB cache, we must
2027 reset the modified entries */
2028 /* XXX: slow ! */
2029 CPU_FOREACH(cpu) {
2030 /* FIXME: Disentangle the cpu.h circular files deps so we can
2031 directly get the right CPU from listener. */
2032 if (cpu->tcg_as_listener != listener) {
2033 continue;
2035 tlb_flush(cpu, 1);
2039 static void core_log_global_start(MemoryListener *listener)
2041 cpu_physical_memory_set_dirty_tracking(true);
2044 static void core_log_global_stop(MemoryListener *listener)
2046 cpu_physical_memory_set_dirty_tracking(false);
2049 static MemoryListener core_memory_listener = {
2050 .log_global_start = core_log_global_start,
2051 .log_global_stop = core_log_global_stop,
2052 .priority = 1,
2055 void address_space_init_dispatch(AddressSpace *as)
2057 as->dispatch = NULL;
2058 as->dispatch_listener = (MemoryListener) {
2059 .begin = mem_begin,
2060 .commit = mem_commit,
2061 .region_add = mem_add,
2062 .region_nop = mem_add,
2063 .priority = 0,
2065 memory_listener_register(&as->dispatch_listener, as);
2068 void address_space_destroy_dispatch(AddressSpace *as)
2070 AddressSpaceDispatch *d = as->dispatch;
2072 memory_listener_unregister(&as->dispatch_listener);
2073 g_free(d);
2074 as->dispatch = NULL;
2077 static void memory_map_init(void)
2079 system_memory = g_malloc(sizeof(*system_memory));
2081 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2082 address_space_init(&address_space_memory, system_memory, "memory");
2084 system_io = g_malloc(sizeof(*system_io));
2085 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2086 65536);
2087 address_space_init(&address_space_io, system_io, "I/O");
2089 memory_listener_register(&core_memory_listener, &address_space_memory);
2092 MemoryRegion *get_system_memory(void)
2094 return system_memory;
2097 MemoryRegion *get_system_io(void)
2099 return system_io;
2102 #endif /* !defined(CONFIG_USER_ONLY) */
2104 /* physical memory access (slow version, mainly for debug) */
2105 #if defined(CONFIG_USER_ONLY)
2106 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2107 uint8_t *buf, int len, int is_write)
2109 int l, flags;
2110 target_ulong page;
2111 void * p;
2113 while (len > 0) {
2114 page = addr & TARGET_PAGE_MASK;
2115 l = (page + TARGET_PAGE_SIZE) - addr;
2116 if (l > len)
2117 l = len;
2118 flags = page_get_flags(page);
2119 if (!(flags & PAGE_VALID))
2120 return -1;
2121 if (is_write) {
2122 if (!(flags & PAGE_WRITE))
2123 return -1;
2124 /* XXX: this code should not depend on lock_user */
2125 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2126 return -1;
2127 memcpy(p, buf, l);
2128 unlock_user(p, addr, l);
2129 } else {
2130 if (!(flags & PAGE_READ))
2131 return -1;
2132 /* XXX: this code should not depend on lock_user */
2133 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2134 return -1;
2135 memcpy(buf, p, l);
2136 unlock_user(p, addr, 0);
2138 len -= l;
2139 buf += l;
2140 addr += l;
2142 return 0;
2145 #else
2147 static void invalidate_and_set_dirty(hwaddr addr,
2148 hwaddr length)
2150 if (cpu_physical_memory_range_includes_clean(addr, length)) {
2151 tb_invalidate_phys_range(addr, addr + length, 0);
2152 cpu_physical_memory_set_dirty_range_nocode(addr, length);
2154 xen_modified_memory(addr, length);
2157 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2159 unsigned access_size_max = mr->ops->valid.max_access_size;
2161 /* Regions are assumed to support 1-4 byte accesses unless
2162 otherwise specified. */
2163 if (access_size_max == 0) {
2164 access_size_max = 4;
2167 /* Bound the maximum access by the alignment of the address. */
2168 if (!mr->ops->impl.unaligned) {
2169 unsigned align_size_max = addr & -addr;
2170 if (align_size_max != 0 && align_size_max < access_size_max) {
2171 access_size_max = align_size_max;
2175 /* Don't attempt accesses larger than the maximum. */
2176 if (l > access_size_max) {
2177 l = access_size_max;
2179 if (l & (l - 1)) {
2180 l = 1 << (qemu_fls(l) - 1);
2183 return l;
2186 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
2187 int len, bool is_write)
2189 hwaddr l;
2190 uint8_t *ptr;
2191 uint64_t val;
2192 hwaddr addr1;
2193 MemoryRegion *mr;
2194 bool error = false;
2196 while (len > 0) {
2197 l = len;
2198 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2200 if (is_write) {
2201 if (!memory_access_is_direct(mr, is_write)) {
2202 l = memory_access_size(mr, l, addr1);
2203 /* XXX: could force current_cpu to NULL to avoid
2204 potential bugs */
2205 switch (l) {
2206 case 8:
2207 /* 64 bit write access */
2208 val = ldq_p(buf);
2209 error |= io_mem_write(mr, addr1, val, 8);
2210 break;
2211 case 4:
2212 /* 32 bit write access */
2213 val = ldl_p(buf);
2214 error |= io_mem_write(mr, addr1, val, 4);
2215 break;
2216 case 2:
2217 /* 16 bit write access */
2218 val = lduw_p(buf);
2219 error |= io_mem_write(mr, addr1, val, 2);
2220 break;
2221 case 1:
2222 /* 8 bit write access */
2223 val = ldub_p(buf);
2224 error |= io_mem_write(mr, addr1, val, 1);
2225 break;
2226 default:
2227 abort();
2229 } else {
2230 addr1 += memory_region_get_ram_addr(mr);
2231 /* RAM case */
2232 ptr = qemu_get_ram_ptr(addr1);
2233 memcpy(ptr, buf, l);
2234 invalidate_and_set_dirty(addr1, l);
2236 } else {
2237 if (!memory_access_is_direct(mr, is_write)) {
2238 /* I/O case */
2239 l = memory_access_size(mr, l, addr1);
2240 switch (l) {
2241 case 8:
2242 /* 64 bit read access */
2243 error |= io_mem_read(mr, addr1, &val, 8);
2244 stq_p(buf, val);
2245 break;
2246 case 4:
2247 /* 32 bit read access */
2248 error |= io_mem_read(mr, addr1, &val, 4);
2249 stl_p(buf, val);
2250 break;
2251 case 2:
2252 /* 16 bit read access */
2253 error |= io_mem_read(mr, addr1, &val, 2);
2254 stw_p(buf, val);
2255 break;
2256 case 1:
2257 /* 8 bit read access */
2258 error |= io_mem_read(mr, addr1, &val, 1);
2259 stb_p(buf, val);
2260 break;
2261 default:
2262 abort();
2264 } else {
2265 /* RAM case */
2266 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2267 memcpy(buf, ptr, l);
2270 len -= l;
2271 buf += l;
2272 addr += l;
2275 return error;
2278 bool address_space_write(AddressSpace *as, hwaddr addr,
2279 const uint8_t *buf, int len)
2281 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2284 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2286 return address_space_rw(as, addr, buf, len, false);
2290 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2291 int len, int is_write)
2293 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2296 enum write_rom_type {
2297 WRITE_DATA,
2298 FLUSH_CACHE,
2301 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2302 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2304 hwaddr l;
2305 uint8_t *ptr;
2306 hwaddr addr1;
2307 MemoryRegion *mr;
2309 while (len > 0) {
2310 l = len;
2311 mr = address_space_translate(as, addr, &addr1, &l, true);
2313 if (!(memory_region_is_ram(mr) ||
2314 memory_region_is_romd(mr))) {
2315 /* do nothing */
2316 } else {
2317 addr1 += memory_region_get_ram_addr(mr);
2318 /* ROM/RAM case */
2319 ptr = qemu_get_ram_ptr(addr1);
2320 switch (type) {
2321 case WRITE_DATA:
2322 memcpy(ptr, buf, l);
2323 invalidate_and_set_dirty(addr1, l);
2324 break;
2325 case FLUSH_CACHE:
2326 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2327 break;
2330 len -= l;
2331 buf += l;
2332 addr += l;
2336 /* used for ROM loading : can write in RAM and ROM */
2337 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2338 const uint8_t *buf, int len)
2340 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2343 void cpu_flush_icache_range(hwaddr start, int len)
2346 * This function should do the same thing as an icache flush that was
2347 * triggered from within the guest. For TCG we are always cache coherent,
2348 * so there is no need to flush anything. For KVM / Xen we need to flush
2349 * the host's instruction cache at least.
2351 if (tcg_enabled()) {
2352 return;
2355 cpu_physical_memory_write_rom_internal(&address_space_memory,
2356 start, NULL, len, FLUSH_CACHE);
2359 typedef struct {
2360 MemoryRegion *mr;
2361 void *buffer;
2362 hwaddr addr;
2363 hwaddr len;
2364 } BounceBuffer;
2366 static BounceBuffer bounce;
2368 typedef struct MapClient {
2369 void *opaque;
2370 void (*callback)(void *opaque);
2371 QLIST_ENTRY(MapClient) link;
2372 } MapClient;
2374 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2375 = QLIST_HEAD_INITIALIZER(map_client_list);
2377 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2379 MapClient *client = g_malloc(sizeof(*client));
2381 client->opaque = opaque;
2382 client->callback = callback;
2383 QLIST_INSERT_HEAD(&map_client_list, client, link);
2384 return client;
2387 static void cpu_unregister_map_client(void *_client)
2389 MapClient *client = (MapClient *)_client;
2391 QLIST_REMOVE(client, link);
2392 g_free(client);
2395 static void cpu_notify_map_clients(void)
2397 MapClient *client;
2399 while (!QLIST_EMPTY(&map_client_list)) {
2400 client = QLIST_FIRST(&map_client_list);
2401 client->callback(client->opaque);
2402 cpu_unregister_map_client(client);
2406 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2408 MemoryRegion *mr;
2409 hwaddr l, xlat;
2411 while (len > 0) {
2412 l = len;
2413 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2414 if (!memory_access_is_direct(mr, is_write)) {
2415 l = memory_access_size(mr, l, addr);
2416 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2417 return false;
2421 len -= l;
2422 addr += l;
2424 return true;
2427 /* Map a physical memory region into a host virtual address.
2428 * May map a subset of the requested range, given by and returned in *plen.
2429 * May return NULL if resources needed to perform the mapping are exhausted.
2430 * Use only for reads OR writes - not for read-modify-write operations.
2431 * Use cpu_register_map_client() to know when retrying the map operation is
2432 * likely to succeed.
2434 void *address_space_map(AddressSpace *as,
2435 hwaddr addr,
2436 hwaddr *plen,
2437 bool is_write)
2439 hwaddr len = *plen;
2440 hwaddr done = 0;
2441 hwaddr l, xlat, base;
2442 MemoryRegion *mr, *this_mr;
2443 ram_addr_t raddr;
2445 if (len == 0) {
2446 return NULL;
2449 l = len;
2450 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2451 if (!memory_access_is_direct(mr, is_write)) {
2452 if (bounce.buffer) {
2453 return NULL;
2455 /* Avoid unbounded allocations */
2456 l = MIN(l, TARGET_PAGE_SIZE);
2457 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2458 bounce.addr = addr;
2459 bounce.len = l;
2461 memory_region_ref(mr);
2462 bounce.mr = mr;
2463 if (!is_write) {
2464 address_space_read(as, addr, bounce.buffer, l);
2467 *plen = l;
2468 return bounce.buffer;
2471 base = xlat;
2472 raddr = memory_region_get_ram_addr(mr);
2474 for (;;) {
2475 len -= l;
2476 addr += l;
2477 done += l;
2478 if (len == 0) {
2479 break;
2482 l = len;
2483 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2484 if (this_mr != mr || xlat != base + done) {
2485 break;
2489 memory_region_ref(mr);
2490 *plen = done;
2491 return qemu_ram_ptr_length(raddr + base, plen);
2494 /* Unmaps a memory region previously mapped by address_space_map().
2495 * Will also mark the memory as dirty if is_write == 1. access_len gives
2496 * the amount of memory that was actually read or written by the caller.
2498 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2499 int is_write, hwaddr access_len)
2501 if (buffer != bounce.buffer) {
2502 MemoryRegion *mr;
2503 ram_addr_t addr1;
2505 mr = qemu_ram_addr_from_host(buffer, &addr1);
2506 assert(mr != NULL);
2507 if (is_write) {
2508 invalidate_and_set_dirty(addr1, access_len);
2510 if (xen_enabled()) {
2511 xen_invalidate_map_cache_entry(buffer);
2513 memory_region_unref(mr);
2514 return;
2516 if (is_write) {
2517 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2519 qemu_vfree(bounce.buffer);
2520 bounce.buffer = NULL;
2521 memory_region_unref(bounce.mr);
2522 cpu_notify_map_clients();
2525 void *cpu_physical_memory_map(hwaddr addr,
2526 hwaddr *plen,
2527 int is_write)
2529 return address_space_map(&address_space_memory, addr, plen, is_write);
2532 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2533 int is_write, hwaddr access_len)
2535 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2538 /* warning: addr must be aligned */
2539 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2540 enum device_endian endian)
2542 uint8_t *ptr;
2543 uint64_t val;
2544 MemoryRegion *mr;
2545 hwaddr l = 4;
2546 hwaddr addr1;
2548 mr = address_space_translate(as, addr, &addr1, &l, false);
2549 if (l < 4 || !memory_access_is_direct(mr, false)) {
2550 /* I/O case */
2551 io_mem_read(mr, addr1, &val, 4);
2552 #if defined(TARGET_WORDS_BIGENDIAN)
2553 if (endian == DEVICE_LITTLE_ENDIAN) {
2554 val = bswap32(val);
2556 #else
2557 if (endian == DEVICE_BIG_ENDIAN) {
2558 val = bswap32(val);
2560 #endif
2561 } else {
2562 /* RAM case */
2563 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2564 & TARGET_PAGE_MASK)
2565 + addr1);
2566 switch (endian) {
2567 case DEVICE_LITTLE_ENDIAN:
2568 val = ldl_le_p(ptr);
2569 break;
2570 case DEVICE_BIG_ENDIAN:
2571 val = ldl_be_p(ptr);
2572 break;
2573 default:
2574 val = ldl_p(ptr);
2575 break;
2578 return val;
2581 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2583 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2586 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2588 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2591 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2593 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2596 /* warning: addr must be aligned */
2597 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2598 enum device_endian endian)
2600 uint8_t *ptr;
2601 uint64_t val;
2602 MemoryRegion *mr;
2603 hwaddr l = 8;
2604 hwaddr addr1;
2606 mr = address_space_translate(as, addr, &addr1, &l,
2607 false);
2608 if (l < 8 || !memory_access_is_direct(mr, false)) {
2609 /* I/O case */
2610 io_mem_read(mr, addr1, &val, 8);
2611 #if defined(TARGET_WORDS_BIGENDIAN)
2612 if (endian == DEVICE_LITTLE_ENDIAN) {
2613 val = bswap64(val);
2615 #else
2616 if (endian == DEVICE_BIG_ENDIAN) {
2617 val = bswap64(val);
2619 #endif
2620 } else {
2621 /* RAM case */
2622 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2623 & TARGET_PAGE_MASK)
2624 + addr1);
2625 switch (endian) {
2626 case DEVICE_LITTLE_ENDIAN:
2627 val = ldq_le_p(ptr);
2628 break;
2629 case DEVICE_BIG_ENDIAN:
2630 val = ldq_be_p(ptr);
2631 break;
2632 default:
2633 val = ldq_p(ptr);
2634 break;
2637 return val;
2640 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2642 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2645 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2647 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2650 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2652 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2655 /* XXX: optimize */
2656 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2658 uint8_t val;
2659 address_space_rw(as, addr, &val, 1, 0);
2660 return val;
2663 /* warning: addr must be aligned */
2664 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2665 enum device_endian endian)
2667 uint8_t *ptr;
2668 uint64_t val;
2669 MemoryRegion *mr;
2670 hwaddr l = 2;
2671 hwaddr addr1;
2673 mr = address_space_translate(as, addr, &addr1, &l,
2674 false);
2675 if (l < 2 || !memory_access_is_direct(mr, false)) {
2676 /* I/O case */
2677 io_mem_read(mr, addr1, &val, 2);
2678 #if defined(TARGET_WORDS_BIGENDIAN)
2679 if (endian == DEVICE_LITTLE_ENDIAN) {
2680 val = bswap16(val);
2682 #else
2683 if (endian == DEVICE_BIG_ENDIAN) {
2684 val = bswap16(val);
2686 #endif
2687 } else {
2688 /* RAM case */
2689 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2690 & TARGET_PAGE_MASK)
2691 + addr1);
2692 switch (endian) {
2693 case DEVICE_LITTLE_ENDIAN:
2694 val = lduw_le_p(ptr);
2695 break;
2696 case DEVICE_BIG_ENDIAN:
2697 val = lduw_be_p(ptr);
2698 break;
2699 default:
2700 val = lduw_p(ptr);
2701 break;
2704 return val;
2707 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2709 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2712 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2714 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2717 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2719 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2722 /* warning: addr must be aligned. The ram page is not masked as dirty
2723 and the code inside is not invalidated. It is useful if the dirty
2724 bits are used to track modified PTEs */
2725 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2727 uint8_t *ptr;
2728 MemoryRegion *mr;
2729 hwaddr l = 4;
2730 hwaddr addr1;
2732 mr = address_space_translate(as, addr, &addr1, &l,
2733 true);
2734 if (l < 4 || !memory_access_is_direct(mr, true)) {
2735 io_mem_write(mr, addr1, val, 4);
2736 } else {
2737 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2738 ptr = qemu_get_ram_ptr(addr1);
2739 stl_p(ptr, val);
2741 if (unlikely(in_migration)) {
2742 if (cpu_physical_memory_is_clean(addr1)) {
2743 /* invalidate code */
2744 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2745 /* set dirty bit */
2746 cpu_physical_memory_set_dirty_range_nocode(addr1, 4);
2752 /* warning: addr must be aligned */
2753 static inline void stl_phys_internal(AddressSpace *as,
2754 hwaddr addr, uint32_t val,
2755 enum device_endian endian)
2757 uint8_t *ptr;
2758 MemoryRegion *mr;
2759 hwaddr l = 4;
2760 hwaddr addr1;
2762 mr = address_space_translate(as, addr, &addr1, &l,
2763 true);
2764 if (l < 4 || !memory_access_is_direct(mr, true)) {
2765 #if defined(TARGET_WORDS_BIGENDIAN)
2766 if (endian == DEVICE_LITTLE_ENDIAN) {
2767 val = bswap32(val);
2769 #else
2770 if (endian == DEVICE_BIG_ENDIAN) {
2771 val = bswap32(val);
2773 #endif
2774 io_mem_write(mr, addr1, val, 4);
2775 } else {
2776 /* RAM case */
2777 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2778 ptr = qemu_get_ram_ptr(addr1);
2779 switch (endian) {
2780 case DEVICE_LITTLE_ENDIAN:
2781 stl_le_p(ptr, val);
2782 break;
2783 case DEVICE_BIG_ENDIAN:
2784 stl_be_p(ptr, val);
2785 break;
2786 default:
2787 stl_p(ptr, val);
2788 break;
2790 invalidate_and_set_dirty(addr1, 4);
2794 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2796 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2799 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2801 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2804 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2806 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2809 /* XXX: optimize */
2810 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2812 uint8_t v = val;
2813 address_space_rw(as, addr, &v, 1, 1);
2816 /* warning: addr must be aligned */
2817 static inline void stw_phys_internal(AddressSpace *as,
2818 hwaddr addr, uint32_t val,
2819 enum device_endian endian)
2821 uint8_t *ptr;
2822 MemoryRegion *mr;
2823 hwaddr l = 2;
2824 hwaddr addr1;
2826 mr = address_space_translate(as, addr, &addr1, &l, true);
2827 if (l < 2 || !memory_access_is_direct(mr, true)) {
2828 #if defined(TARGET_WORDS_BIGENDIAN)
2829 if (endian == DEVICE_LITTLE_ENDIAN) {
2830 val = bswap16(val);
2832 #else
2833 if (endian == DEVICE_BIG_ENDIAN) {
2834 val = bswap16(val);
2836 #endif
2837 io_mem_write(mr, addr1, val, 2);
2838 } else {
2839 /* RAM case */
2840 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2841 ptr = qemu_get_ram_ptr(addr1);
2842 switch (endian) {
2843 case DEVICE_LITTLE_ENDIAN:
2844 stw_le_p(ptr, val);
2845 break;
2846 case DEVICE_BIG_ENDIAN:
2847 stw_be_p(ptr, val);
2848 break;
2849 default:
2850 stw_p(ptr, val);
2851 break;
2853 invalidate_and_set_dirty(addr1, 2);
2857 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2859 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2862 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2864 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2867 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2869 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2872 /* XXX: optimize */
2873 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2875 val = tswap64(val);
2876 address_space_rw(as, addr, (void *) &val, 8, 1);
2879 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2881 val = cpu_to_le64(val);
2882 address_space_rw(as, addr, (void *) &val, 8, 1);
2885 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2887 val = cpu_to_be64(val);
2888 address_space_rw(as, addr, (void *) &val, 8, 1);
2891 /* virtual memory access for debug (includes writing to ROM) */
2892 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2893 uint8_t *buf, int len, int is_write)
2895 int l;
2896 hwaddr phys_addr;
2897 target_ulong page;
2899 while (len > 0) {
2900 page = addr & TARGET_PAGE_MASK;
2901 phys_addr = cpu_get_phys_page_debug(cpu, page);
2902 /* if no physical page mapped, return an error */
2903 if (phys_addr == -1)
2904 return -1;
2905 l = (page + TARGET_PAGE_SIZE) - addr;
2906 if (l > len)
2907 l = len;
2908 phys_addr += (addr & ~TARGET_PAGE_MASK);
2909 if (is_write) {
2910 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
2911 } else {
2912 address_space_rw(cpu->as, phys_addr, buf, l, 0);
2914 len -= l;
2915 buf += l;
2916 addr += l;
2918 return 0;
2920 #endif
2923 * A helper function for the _utterly broken_ virtio device model to find out if
2924 * it's running on a big endian machine. Don't do this at home kids!
2926 bool target_words_bigendian(void);
2927 bool target_words_bigendian(void)
2929 #if defined(TARGET_WORDS_BIGENDIAN)
2930 return true;
2931 #else
2932 return false;
2933 #endif
2936 #ifndef CONFIG_USER_ONLY
2937 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2939 MemoryRegion*mr;
2940 hwaddr l = 1;
2942 mr = address_space_translate(&address_space_memory,
2943 phys_addr, &phys_addr, &l, false);
2945 return !(memory_region_is_ram(mr) ||
2946 memory_region_is_romd(mr));
2949 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2951 RAMBlock *block;
2953 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2954 func(block->host, block->offset, block->used_length, opaque);
2957 #endif