kvm_stat: Add aarch64 support
[qemu/ar7.git] / exec.c
blob410371d7b8e68a5eaf7acb9b2930775997c5222a
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #include "hw/qdev.h"
30 #include "qemu/osdep.h"
31 #include "sysemu/kvm.h"
32 #include "sysemu/sysemu.h"
33 #include "hw/xen/xen.h"
34 #include "qemu/timer.h"
35 #include "qemu/config-file.h"
36 #include "qemu/error-report.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
51 #include "exec/memory-internal.h"
52 #include "exec/ram_addr.h"
54 #include "qemu/range.h"
56 //#define DEBUG_SUBPAGE
58 #if !defined(CONFIG_USER_ONLY)
59 static bool in_migration;
61 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
63 static MemoryRegion *system_memory;
64 static MemoryRegion *system_io;
66 AddressSpace address_space_io;
67 AddressSpace address_space_memory;
69 MemoryRegion io_mem_rom, io_mem_notdirty;
70 static MemoryRegion io_mem_unassigned;
72 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
73 #define RAM_PREALLOC (1 << 0)
75 /* RAM is mmap-ed with MAP_SHARED */
76 #define RAM_SHARED (1 << 1)
78 /* Only a portion of RAM (used_length) is actually used, and migrated.
79 * This used_length size can change across reboots.
81 #define RAM_RESIZEABLE (1 << 2)
83 #endif
85 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
86 /* current CPU in the current thread. It is only valid inside
87 cpu_exec() */
88 DEFINE_TLS(CPUState *, current_cpu);
89 /* 0 = Do not count executed instructions.
90 1 = Precise instruction counting.
91 2 = Adaptive rate instruction counting. */
92 int use_icount;
94 #if !defined(CONFIG_USER_ONLY)
96 typedef struct PhysPageEntry PhysPageEntry;
98 struct PhysPageEntry {
99 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
100 uint32_t skip : 6;
101 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
102 uint32_t ptr : 26;
105 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
107 /* Size of the L2 (and L3, etc) page tables. */
108 #define ADDR_SPACE_BITS 64
110 #define P_L2_BITS 9
111 #define P_L2_SIZE (1 << P_L2_BITS)
113 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
115 typedef PhysPageEntry Node[P_L2_SIZE];
117 typedef struct PhysPageMap {
118 unsigned sections_nb;
119 unsigned sections_nb_alloc;
120 unsigned nodes_nb;
121 unsigned nodes_nb_alloc;
122 Node *nodes;
123 MemoryRegionSection *sections;
124 } PhysPageMap;
126 struct AddressSpaceDispatch {
127 /* This is a multi-level map on the physical address space.
128 * The bottom level has pointers to MemoryRegionSections.
130 PhysPageEntry phys_map;
131 PhysPageMap map;
132 AddressSpace *as;
135 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
136 typedef struct subpage_t {
137 MemoryRegion iomem;
138 AddressSpace *as;
139 hwaddr base;
140 uint16_t sub_section[TARGET_PAGE_SIZE];
141 } subpage_t;
143 #define PHYS_SECTION_UNASSIGNED 0
144 #define PHYS_SECTION_NOTDIRTY 1
145 #define PHYS_SECTION_ROM 2
146 #define PHYS_SECTION_WATCH 3
148 static void io_mem_init(void);
149 static void memory_map_init(void);
150 static void tcg_commit(MemoryListener *listener);
152 static MemoryRegion io_mem_watch;
153 #endif
155 #if !defined(CONFIG_USER_ONLY)
157 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
159 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
160 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
161 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
162 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
166 static uint32_t phys_map_node_alloc(PhysPageMap *map)
168 unsigned i;
169 uint32_t ret;
171 ret = map->nodes_nb++;
172 assert(ret != PHYS_MAP_NODE_NIL);
173 assert(ret != map->nodes_nb_alloc);
174 for (i = 0; i < P_L2_SIZE; ++i) {
175 map->nodes[ret][i].skip = 1;
176 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
178 return ret;
181 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
182 hwaddr *index, hwaddr *nb, uint16_t leaf,
183 int level)
185 PhysPageEntry *p;
186 int i;
187 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
189 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
190 lp->ptr = phys_map_node_alloc(map);
191 p = map->nodes[lp->ptr];
192 if (level == 0) {
193 for (i = 0; i < P_L2_SIZE; i++) {
194 p[i].skip = 0;
195 p[i].ptr = PHYS_SECTION_UNASSIGNED;
198 } else {
199 p = map->nodes[lp->ptr];
201 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
203 while (*nb && lp < &p[P_L2_SIZE]) {
204 if ((*index & (step - 1)) == 0 && *nb >= step) {
205 lp->skip = 0;
206 lp->ptr = leaf;
207 *index += step;
208 *nb -= step;
209 } else {
210 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
212 ++lp;
216 static void phys_page_set(AddressSpaceDispatch *d,
217 hwaddr index, hwaddr nb,
218 uint16_t leaf)
220 /* Wildly overreserve - it doesn't matter much. */
221 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
223 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
226 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
227 * and update our entry so we can skip it and go directly to the destination.
229 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
231 unsigned valid_ptr = P_L2_SIZE;
232 int valid = 0;
233 PhysPageEntry *p;
234 int i;
236 if (lp->ptr == PHYS_MAP_NODE_NIL) {
237 return;
240 p = nodes[lp->ptr];
241 for (i = 0; i < P_L2_SIZE; i++) {
242 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
243 continue;
246 valid_ptr = i;
247 valid++;
248 if (p[i].skip) {
249 phys_page_compact(&p[i], nodes, compacted);
253 /* We can only compress if there's only one child. */
254 if (valid != 1) {
255 return;
258 assert(valid_ptr < P_L2_SIZE);
260 /* Don't compress if it won't fit in the # of bits we have. */
261 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
262 return;
265 lp->ptr = p[valid_ptr].ptr;
266 if (!p[valid_ptr].skip) {
267 /* If our only child is a leaf, make this a leaf. */
268 /* By design, we should have made this node a leaf to begin with so we
269 * should never reach here.
270 * But since it's so simple to handle this, let's do it just in case we
271 * change this rule.
273 lp->skip = 0;
274 } else {
275 lp->skip += p[valid_ptr].skip;
279 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
281 DECLARE_BITMAP(compacted, nodes_nb);
283 if (d->phys_map.skip) {
284 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
288 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
289 Node *nodes, MemoryRegionSection *sections)
291 PhysPageEntry *p;
292 hwaddr index = addr >> TARGET_PAGE_BITS;
293 int i;
295 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
296 if (lp.ptr == PHYS_MAP_NODE_NIL) {
297 return &sections[PHYS_SECTION_UNASSIGNED];
299 p = nodes[lp.ptr];
300 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
303 if (sections[lp.ptr].size.hi ||
304 range_covers_byte(sections[lp.ptr].offset_within_address_space,
305 sections[lp.ptr].size.lo, addr)) {
306 return &sections[lp.ptr];
307 } else {
308 return &sections[PHYS_SECTION_UNASSIGNED];
312 bool memory_region_is_unassigned(MemoryRegion *mr)
314 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
315 && mr != &io_mem_watch;
318 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
319 hwaddr addr,
320 bool resolve_subpage)
322 MemoryRegionSection *section;
323 subpage_t *subpage;
325 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
326 if (resolve_subpage && section->mr->subpage) {
327 subpage = container_of(section->mr, subpage_t, iomem);
328 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
330 return section;
333 static MemoryRegionSection *
334 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
335 hwaddr *plen, bool resolve_subpage)
337 MemoryRegionSection *section;
338 Int128 diff;
340 section = address_space_lookup_region(d, addr, resolve_subpage);
341 /* Compute offset within MemoryRegionSection */
342 addr -= section->offset_within_address_space;
344 /* Compute offset within MemoryRegion */
345 *xlat = addr + section->offset_within_region;
347 diff = int128_sub(section->mr->size, int128_make64(addr));
348 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
349 return section;
352 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
354 if (memory_region_is_ram(mr)) {
355 return !(is_write && mr->readonly);
357 if (memory_region_is_romd(mr)) {
358 return !is_write;
361 return false;
364 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
365 hwaddr *xlat, hwaddr *plen,
366 bool is_write)
368 IOMMUTLBEntry iotlb;
369 MemoryRegionSection *section;
370 MemoryRegion *mr;
371 hwaddr len = *plen;
373 for (;;) {
374 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
375 mr = section->mr;
377 if (!mr->iommu_ops) {
378 break;
381 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
382 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
383 | (addr & iotlb.addr_mask));
384 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
385 if (!(iotlb.perm & (1 << is_write))) {
386 mr = &io_mem_unassigned;
387 break;
390 as = iotlb.target_as;
393 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
394 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
395 len = MIN(page, len);
398 *plen = len;
399 *xlat = addr;
400 return mr;
403 MemoryRegionSection *
404 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
405 hwaddr *plen)
407 MemoryRegionSection *section;
408 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
410 assert(!section->mr->iommu_ops);
411 return section;
413 #endif
415 void cpu_exec_init_all(void)
417 #if !defined(CONFIG_USER_ONLY)
418 qemu_mutex_init(&ram_list.mutex);
419 memory_map_init();
420 io_mem_init();
421 #endif
424 #if !defined(CONFIG_USER_ONLY)
426 static int cpu_common_post_load(void *opaque, int version_id)
428 CPUState *cpu = opaque;
430 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
431 version_id is increased. */
432 cpu->interrupt_request &= ~0x01;
433 tlb_flush(cpu, 1);
435 return 0;
438 static int cpu_common_pre_load(void *opaque)
440 CPUState *cpu = opaque;
442 cpu->exception_index = -1;
444 return 0;
447 static bool cpu_common_exception_index_needed(void *opaque)
449 CPUState *cpu = opaque;
451 return tcg_enabled() && cpu->exception_index != -1;
454 static const VMStateDescription vmstate_cpu_common_exception_index = {
455 .name = "cpu_common/exception_index",
456 .version_id = 1,
457 .minimum_version_id = 1,
458 .fields = (VMStateField[]) {
459 VMSTATE_INT32(exception_index, CPUState),
460 VMSTATE_END_OF_LIST()
464 const VMStateDescription vmstate_cpu_common = {
465 .name = "cpu_common",
466 .version_id = 1,
467 .minimum_version_id = 1,
468 .pre_load = cpu_common_pre_load,
469 .post_load = cpu_common_post_load,
470 .fields = (VMStateField[]) {
471 VMSTATE_UINT32(halted, CPUState),
472 VMSTATE_UINT32(interrupt_request, CPUState),
473 VMSTATE_END_OF_LIST()
475 .subsections = (VMStateSubsection[]) {
477 .vmsd = &vmstate_cpu_common_exception_index,
478 .needed = cpu_common_exception_index_needed,
479 } , {
480 /* empty */
485 #endif
487 CPUState *qemu_get_cpu(int index)
489 CPUState *cpu;
491 CPU_FOREACH(cpu) {
492 if (cpu->cpu_index == index) {
493 return cpu;
497 return NULL;
500 #if !defined(CONFIG_USER_ONLY)
501 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
503 /* We only support one address space per cpu at the moment. */
504 assert(cpu->as == as);
506 if (cpu->tcg_as_listener) {
507 memory_listener_unregister(cpu->tcg_as_listener);
508 } else {
509 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
511 cpu->tcg_as_listener->commit = tcg_commit;
512 memory_listener_register(cpu->tcg_as_listener, as);
514 #endif
516 void cpu_exec_init(CPUArchState *env)
518 CPUState *cpu = ENV_GET_CPU(env);
519 CPUClass *cc = CPU_GET_CLASS(cpu);
520 CPUState *some_cpu;
521 int cpu_index;
523 #if defined(CONFIG_USER_ONLY)
524 cpu_list_lock();
525 #endif
526 cpu_index = 0;
527 CPU_FOREACH(some_cpu) {
528 cpu_index++;
530 cpu->cpu_index = cpu_index;
531 cpu->numa_node = 0;
532 QTAILQ_INIT(&cpu->breakpoints);
533 QTAILQ_INIT(&cpu->watchpoints);
534 #ifndef CONFIG_USER_ONLY
535 cpu->as = &address_space_memory;
536 cpu->thread_id = qemu_get_thread_id();
537 #endif
538 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
539 #if defined(CONFIG_USER_ONLY)
540 cpu_list_unlock();
541 #endif
542 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
543 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
545 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
546 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
547 cpu_save, cpu_load, env);
548 assert(cc->vmsd == NULL);
549 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
550 #endif
551 if (cc->vmsd != NULL) {
552 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
556 #if defined(CONFIG_USER_ONLY)
557 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
559 tb_invalidate_phys_page_range(pc, pc + 1, 0);
561 #else
562 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
564 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
565 if (phys != -1) {
566 tb_invalidate_phys_addr(cpu->as,
567 phys | (pc & ~TARGET_PAGE_MASK));
570 #endif
572 #if defined(CONFIG_USER_ONLY)
573 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
578 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
579 int flags)
581 return -ENOSYS;
584 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
588 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
589 int flags, CPUWatchpoint **watchpoint)
591 return -ENOSYS;
593 #else
594 /* Add a watchpoint. */
595 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
596 int flags, CPUWatchpoint **watchpoint)
598 CPUWatchpoint *wp;
600 /* forbid ranges which are empty or run off the end of the address space */
601 if (len == 0 || (addr + len - 1) < addr) {
602 error_report("tried to set invalid watchpoint at %"
603 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
604 return -EINVAL;
606 wp = g_malloc(sizeof(*wp));
608 wp->vaddr = addr;
609 wp->len = len;
610 wp->flags = flags;
612 /* keep all GDB-injected watchpoints in front */
613 if (flags & BP_GDB) {
614 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
615 } else {
616 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
619 tlb_flush_page(cpu, addr);
621 if (watchpoint)
622 *watchpoint = wp;
623 return 0;
626 /* Remove a specific watchpoint. */
627 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
628 int flags)
630 CPUWatchpoint *wp;
632 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
633 if (addr == wp->vaddr && len == wp->len
634 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
635 cpu_watchpoint_remove_by_ref(cpu, wp);
636 return 0;
639 return -ENOENT;
642 /* Remove a specific watchpoint by reference. */
643 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
645 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
647 tlb_flush_page(cpu, watchpoint->vaddr);
649 g_free(watchpoint);
652 /* Remove all matching watchpoints. */
653 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
655 CPUWatchpoint *wp, *next;
657 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
658 if (wp->flags & mask) {
659 cpu_watchpoint_remove_by_ref(cpu, wp);
664 /* Return true if this watchpoint address matches the specified
665 * access (ie the address range covered by the watchpoint overlaps
666 * partially or completely with the address range covered by the
667 * access).
669 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
670 vaddr addr,
671 vaddr len)
673 /* We know the lengths are non-zero, but a little caution is
674 * required to avoid errors in the case where the range ends
675 * exactly at the top of the address space and so addr + len
676 * wraps round to zero.
678 vaddr wpend = wp->vaddr + wp->len - 1;
679 vaddr addrend = addr + len - 1;
681 return !(addr > wpend || wp->vaddr > addrend);
684 #endif
686 /* Add a breakpoint. */
687 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
688 CPUBreakpoint **breakpoint)
690 CPUBreakpoint *bp;
692 bp = g_malloc(sizeof(*bp));
694 bp->pc = pc;
695 bp->flags = flags;
697 /* keep all GDB-injected breakpoints in front */
698 if (flags & BP_GDB) {
699 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
700 } else {
701 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
704 breakpoint_invalidate(cpu, pc);
706 if (breakpoint) {
707 *breakpoint = bp;
709 return 0;
712 /* Remove a specific breakpoint. */
713 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
715 CPUBreakpoint *bp;
717 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
718 if (bp->pc == pc && bp->flags == flags) {
719 cpu_breakpoint_remove_by_ref(cpu, bp);
720 return 0;
723 return -ENOENT;
726 /* Remove a specific breakpoint by reference. */
727 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
729 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
731 breakpoint_invalidate(cpu, breakpoint->pc);
733 g_free(breakpoint);
736 /* Remove all matching breakpoints. */
737 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
739 CPUBreakpoint *bp, *next;
741 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
742 if (bp->flags & mask) {
743 cpu_breakpoint_remove_by_ref(cpu, bp);
748 /* enable or disable single step mode. EXCP_DEBUG is returned by the
749 CPU loop after each instruction */
750 void cpu_single_step(CPUState *cpu, int enabled)
752 if (cpu->singlestep_enabled != enabled) {
753 cpu->singlestep_enabled = enabled;
754 if (kvm_enabled()) {
755 kvm_update_guest_debug(cpu, 0);
756 } else {
757 /* must flush all the translated code to avoid inconsistencies */
758 /* XXX: only flush what is necessary */
759 CPUArchState *env = cpu->env_ptr;
760 tb_flush(env);
765 void cpu_abort(CPUState *cpu, const char *fmt, ...)
767 va_list ap;
768 va_list ap2;
770 va_start(ap, fmt);
771 va_copy(ap2, ap);
772 fprintf(stderr, "qemu: fatal: ");
773 vfprintf(stderr, fmt, ap);
774 fprintf(stderr, "\n");
775 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
776 if (qemu_log_enabled()) {
777 qemu_log("qemu: fatal: ");
778 qemu_log_vprintf(fmt, ap2);
779 qemu_log("\n");
780 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
781 qemu_log_flush();
782 qemu_log_close();
784 va_end(ap2);
785 va_end(ap);
786 #if defined(CONFIG_USER_ONLY)
788 struct sigaction act;
789 sigfillset(&act.sa_mask);
790 act.sa_handler = SIG_DFL;
791 sigaction(SIGABRT, &act, NULL);
793 #endif
794 abort();
797 #if !defined(CONFIG_USER_ONLY)
798 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
800 RAMBlock *block;
802 /* The list is protected by the iothread lock here. */
803 block = ram_list.mru_block;
804 if (block && addr - block->offset < block->max_length) {
805 goto found;
807 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
808 if (addr - block->offset < block->max_length) {
809 goto found;
813 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
814 abort();
816 found:
817 ram_list.mru_block = block;
818 return block;
821 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
823 ram_addr_t start1;
824 RAMBlock *block;
825 ram_addr_t end;
827 end = TARGET_PAGE_ALIGN(start + length);
828 start &= TARGET_PAGE_MASK;
830 block = qemu_get_ram_block(start);
831 assert(block == qemu_get_ram_block(end - 1));
832 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
833 cpu_tlb_reset_dirty_all(start1, length);
836 /* Note: start and end must be within the same ram block. */
837 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
838 unsigned client)
840 if (length == 0)
841 return;
842 cpu_physical_memory_clear_dirty_range_type(start, length, client);
844 if (tcg_enabled()) {
845 tlb_reset_dirty_range_all(start, length);
849 static void cpu_physical_memory_set_dirty_tracking(bool enable)
851 in_migration = enable;
854 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
855 MemoryRegionSection *section,
856 target_ulong vaddr,
857 hwaddr paddr, hwaddr xlat,
858 int prot,
859 target_ulong *address)
861 hwaddr iotlb;
862 CPUWatchpoint *wp;
864 if (memory_region_is_ram(section->mr)) {
865 /* Normal RAM. */
866 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
867 + xlat;
868 if (!section->readonly) {
869 iotlb |= PHYS_SECTION_NOTDIRTY;
870 } else {
871 iotlb |= PHYS_SECTION_ROM;
873 } else {
874 iotlb = section - section->address_space->dispatch->map.sections;
875 iotlb += xlat;
878 /* Make accesses to pages with watchpoints go via the
879 watchpoint trap routines. */
880 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
881 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
882 /* Avoid trapping reads of pages with a write breakpoint. */
883 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
884 iotlb = PHYS_SECTION_WATCH + paddr;
885 *address |= TLB_MMIO;
886 break;
891 return iotlb;
893 #endif /* defined(CONFIG_USER_ONLY) */
895 #if !defined(CONFIG_USER_ONLY)
897 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
898 uint16_t section);
899 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
901 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
902 qemu_anon_ram_alloc;
905 * Set a custom physical guest memory alloator.
906 * Accelerators with unusual needs may need this. Hopefully, we can
907 * get rid of it eventually.
909 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
911 phys_mem_alloc = alloc;
914 static uint16_t phys_section_add(PhysPageMap *map,
915 MemoryRegionSection *section)
917 /* The physical section number is ORed with a page-aligned
918 * pointer to produce the iotlb entries. Thus it should
919 * never overflow into the page-aligned value.
921 assert(map->sections_nb < TARGET_PAGE_SIZE);
923 if (map->sections_nb == map->sections_nb_alloc) {
924 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
925 map->sections = g_renew(MemoryRegionSection, map->sections,
926 map->sections_nb_alloc);
928 map->sections[map->sections_nb] = *section;
929 memory_region_ref(section->mr);
930 return map->sections_nb++;
933 static void phys_section_destroy(MemoryRegion *mr)
935 memory_region_unref(mr);
937 if (mr->subpage) {
938 subpage_t *subpage = container_of(mr, subpage_t, iomem);
939 object_unref(OBJECT(&subpage->iomem));
940 g_free(subpage);
944 static void phys_sections_free(PhysPageMap *map)
946 while (map->sections_nb > 0) {
947 MemoryRegionSection *section = &map->sections[--map->sections_nb];
948 phys_section_destroy(section->mr);
950 g_free(map->sections);
951 g_free(map->nodes);
954 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
956 subpage_t *subpage;
957 hwaddr base = section->offset_within_address_space
958 & TARGET_PAGE_MASK;
959 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
960 d->map.nodes, d->map.sections);
961 MemoryRegionSection subsection = {
962 .offset_within_address_space = base,
963 .size = int128_make64(TARGET_PAGE_SIZE),
965 hwaddr start, end;
967 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
969 if (!(existing->mr->subpage)) {
970 subpage = subpage_init(d->as, base);
971 subsection.address_space = d->as;
972 subsection.mr = &subpage->iomem;
973 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
974 phys_section_add(&d->map, &subsection));
975 } else {
976 subpage = container_of(existing->mr, subpage_t, iomem);
978 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
979 end = start + int128_get64(section->size) - 1;
980 subpage_register(subpage, start, end,
981 phys_section_add(&d->map, section));
985 static void register_multipage(AddressSpaceDispatch *d,
986 MemoryRegionSection *section)
988 hwaddr start_addr = section->offset_within_address_space;
989 uint16_t section_index = phys_section_add(&d->map, section);
990 uint64_t num_pages = int128_get64(int128_rshift(section->size,
991 TARGET_PAGE_BITS));
993 assert(num_pages);
994 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
997 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
999 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1000 AddressSpaceDispatch *d = as->next_dispatch;
1001 MemoryRegionSection now = *section, remain = *section;
1002 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1004 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1005 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1006 - now.offset_within_address_space;
1008 now.size = int128_min(int128_make64(left), now.size);
1009 register_subpage(d, &now);
1010 } else {
1011 now.size = int128_zero();
1013 while (int128_ne(remain.size, now.size)) {
1014 remain.size = int128_sub(remain.size, now.size);
1015 remain.offset_within_address_space += int128_get64(now.size);
1016 remain.offset_within_region += int128_get64(now.size);
1017 now = remain;
1018 if (int128_lt(remain.size, page_size)) {
1019 register_subpage(d, &now);
1020 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1021 now.size = page_size;
1022 register_subpage(d, &now);
1023 } else {
1024 now.size = int128_and(now.size, int128_neg(page_size));
1025 register_multipage(d, &now);
1030 void qemu_flush_coalesced_mmio_buffer(void)
1032 if (kvm_enabled())
1033 kvm_flush_coalesced_mmio_buffer();
1036 void qemu_mutex_lock_ramlist(void)
1038 qemu_mutex_lock(&ram_list.mutex);
1041 void qemu_mutex_unlock_ramlist(void)
1043 qemu_mutex_unlock(&ram_list.mutex);
1046 #ifdef __linux__
1048 #include <sys/vfs.h>
1050 #define HUGETLBFS_MAGIC 0x958458f6
1052 static long gethugepagesize(const char *path, Error **errp)
1054 struct statfs fs;
1055 int ret;
1057 do {
1058 ret = statfs(path, &fs);
1059 } while (ret != 0 && errno == EINTR);
1061 if (ret != 0) {
1062 error_setg_errno(errp, errno, "failed to get page size of file %s",
1063 path);
1064 return 0;
1067 if (fs.f_type != HUGETLBFS_MAGIC)
1068 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1070 return fs.f_bsize;
1073 static void *file_ram_alloc(RAMBlock *block,
1074 ram_addr_t memory,
1075 const char *path,
1076 Error **errp)
1078 char *filename;
1079 char *sanitized_name;
1080 char *c;
1081 void *area = NULL;
1082 int fd;
1083 uint64_t hpagesize;
1084 Error *local_err = NULL;
1086 hpagesize = gethugepagesize(path, &local_err);
1087 if (local_err) {
1088 error_propagate(errp, local_err);
1089 goto error;
1091 block->mr->align = hpagesize;
1093 if (memory < hpagesize) {
1094 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1095 "or larger than huge page size 0x%" PRIx64,
1096 memory, hpagesize);
1097 goto error;
1100 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1101 error_setg(errp,
1102 "host lacks kvm mmu notifiers, -mem-path unsupported");
1103 goto error;
1106 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1107 sanitized_name = g_strdup(memory_region_name(block->mr));
1108 for (c = sanitized_name; *c != '\0'; c++) {
1109 if (*c == '/')
1110 *c = '_';
1113 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1114 sanitized_name);
1115 g_free(sanitized_name);
1117 fd = mkstemp(filename);
1118 if (fd < 0) {
1119 error_setg_errno(errp, errno,
1120 "unable to create backing store for hugepages");
1121 g_free(filename);
1122 goto error;
1124 unlink(filename);
1125 g_free(filename);
1127 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1130 * ftruncate is not supported by hugetlbfs in older
1131 * hosts, so don't bother bailing out on errors.
1132 * If anything goes wrong with it under other filesystems,
1133 * mmap will fail.
1135 if (ftruncate(fd, memory)) {
1136 perror("ftruncate");
1139 area = mmap(0, memory, PROT_READ | PROT_WRITE,
1140 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
1141 fd, 0);
1142 if (area == MAP_FAILED) {
1143 error_setg_errno(errp, errno,
1144 "unable to map backing store for hugepages");
1145 close(fd);
1146 goto error;
1149 if (mem_prealloc) {
1150 os_mem_prealloc(fd, area, memory);
1153 block->fd = fd;
1154 return area;
1156 error:
1157 if (mem_prealloc) {
1158 error_report("%s\n", error_get_pretty(*errp));
1159 exit(1);
1161 return NULL;
1163 #endif
1165 static ram_addr_t find_ram_offset(ram_addr_t size)
1167 RAMBlock *block, *next_block;
1168 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1170 assert(size != 0); /* it would hand out same offset multiple times */
1172 if (QTAILQ_EMPTY(&ram_list.blocks))
1173 return 0;
1175 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1176 ram_addr_t end, next = RAM_ADDR_MAX;
1178 end = block->offset + block->max_length;
1180 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1181 if (next_block->offset >= end) {
1182 next = MIN(next, next_block->offset);
1185 if (next - end >= size && next - end < mingap) {
1186 offset = end;
1187 mingap = next - end;
1191 if (offset == RAM_ADDR_MAX) {
1192 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1193 (uint64_t)size);
1194 abort();
1197 return offset;
1200 ram_addr_t last_ram_offset(void)
1202 RAMBlock *block;
1203 ram_addr_t last = 0;
1205 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1206 last = MAX(last, block->offset + block->max_length);
1208 return last;
1211 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1213 int ret;
1215 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1216 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1217 "dump-guest-core", true)) {
1218 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1219 if (ret) {
1220 perror("qemu_madvise");
1221 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1222 "but dump_guest_core=off specified\n");
1227 static RAMBlock *find_ram_block(ram_addr_t addr)
1229 RAMBlock *block;
1231 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1232 if (block->offset == addr) {
1233 return block;
1237 return NULL;
1240 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1242 RAMBlock *new_block = find_ram_block(addr);
1243 RAMBlock *block;
1245 assert(new_block);
1246 assert(!new_block->idstr[0]);
1248 if (dev) {
1249 char *id = qdev_get_dev_path(dev);
1250 if (id) {
1251 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1252 g_free(id);
1255 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1257 /* This assumes the iothread lock is taken here too. */
1258 qemu_mutex_lock_ramlist();
1259 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1260 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1261 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1262 new_block->idstr);
1263 abort();
1266 qemu_mutex_unlock_ramlist();
1269 void qemu_ram_unset_idstr(ram_addr_t addr)
1271 RAMBlock *block = find_ram_block(addr);
1273 if (block) {
1274 memset(block->idstr, 0, sizeof(block->idstr));
1278 static int memory_try_enable_merging(void *addr, size_t len)
1280 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1281 /* disabled by the user */
1282 return 0;
1285 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1288 /* Only legal before guest might have detected the memory size: e.g. on
1289 * incoming migration, or right after reset.
1291 * As memory core doesn't know how is memory accessed, it is up to
1292 * resize callback to update device state and/or add assertions to detect
1293 * misuse, if necessary.
1295 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1297 RAMBlock *block = find_ram_block(base);
1299 assert(block);
1301 if (block->used_length == newsize) {
1302 return 0;
1305 if (!(block->flags & RAM_RESIZEABLE)) {
1306 error_setg_errno(errp, EINVAL,
1307 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1308 " in != 0x" RAM_ADDR_FMT, block->idstr,
1309 newsize, block->used_length);
1310 return -EINVAL;
1313 if (block->max_length < newsize) {
1314 error_setg_errno(errp, EINVAL,
1315 "Length too large: %s: 0x" RAM_ADDR_FMT
1316 " > 0x" RAM_ADDR_FMT, block->idstr,
1317 newsize, block->max_length);
1318 return -EINVAL;
1321 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1322 block->used_length = newsize;
1323 cpu_physical_memory_set_dirty_range(block->offset, block->used_length);
1324 memory_region_set_size(block->mr, newsize);
1325 if (block->resized) {
1326 block->resized(block->idstr, newsize, block->host);
1328 return 0;
1331 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1333 RAMBlock *block;
1334 ram_addr_t old_ram_size, new_ram_size;
1336 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1338 /* This assumes the iothread lock is taken here too. */
1339 qemu_mutex_lock_ramlist();
1340 new_block->offset = find_ram_offset(new_block->max_length);
1342 if (!new_block->host) {
1343 if (xen_enabled()) {
1344 xen_ram_alloc(new_block->offset, new_block->max_length,
1345 new_block->mr);
1346 } else {
1347 new_block->host = phys_mem_alloc(new_block->max_length,
1348 &new_block->mr->align);
1349 if (!new_block->host) {
1350 error_setg_errno(errp, errno,
1351 "cannot set up guest memory '%s'",
1352 memory_region_name(new_block->mr));
1353 qemu_mutex_unlock_ramlist();
1354 return -1;
1356 memory_try_enable_merging(new_block->host, new_block->max_length);
1360 /* Keep the list sorted from biggest to smallest block. */
1361 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1362 if (block->max_length < new_block->max_length) {
1363 break;
1366 if (block) {
1367 QTAILQ_INSERT_BEFORE(block, new_block, next);
1368 } else {
1369 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1371 ram_list.mru_block = NULL;
1373 ram_list.version++;
1374 qemu_mutex_unlock_ramlist();
1376 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1378 if (new_ram_size > old_ram_size) {
1379 int i;
1380 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1381 ram_list.dirty_memory[i] =
1382 bitmap_zero_extend(ram_list.dirty_memory[i],
1383 old_ram_size, new_ram_size);
1386 cpu_physical_memory_set_dirty_range(new_block->offset,
1387 new_block->used_length);
1389 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1390 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1391 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1393 if (kvm_enabled()) {
1394 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1397 return new_block->offset;
1400 #ifdef __linux__
1401 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1402 bool share, const char *mem_path,
1403 Error **errp)
1405 RAMBlock *new_block;
1406 ram_addr_t addr;
1407 Error *local_err = NULL;
1409 if (xen_enabled()) {
1410 error_setg(errp, "-mem-path not supported with Xen");
1411 return -1;
1414 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1416 * file_ram_alloc() needs to allocate just like
1417 * phys_mem_alloc, but we haven't bothered to provide
1418 * a hook there.
1420 error_setg(errp,
1421 "-mem-path not supported with this accelerator");
1422 return -1;
1425 size = TARGET_PAGE_ALIGN(size);
1426 new_block = g_malloc0(sizeof(*new_block));
1427 new_block->mr = mr;
1428 new_block->used_length = size;
1429 new_block->max_length = size;
1430 new_block->flags = share ? RAM_SHARED : 0;
1431 new_block->host = file_ram_alloc(new_block, size,
1432 mem_path, errp);
1433 if (!new_block->host) {
1434 g_free(new_block);
1435 return -1;
1438 addr = ram_block_add(new_block, &local_err);
1439 if (local_err) {
1440 g_free(new_block);
1441 error_propagate(errp, local_err);
1442 return -1;
1444 return addr;
1446 #endif
1448 static
1449 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1450 void (*resized)(const char*,
1451 uint64_t length,
1452 void *host),
1453 void *host, bool resizeable,
1454 MemoryRegion *mr, Error **errp)
1456 RAMBlock *new_block;
1457 ram_addr_t addr;
1458 Error *local_err = NULL;
1460 size = TARGET_PAGE_ALIGN(size);
1461 max_size = TARGET_PAGE_ALIGN(max_size);
1462 new_block = g_malloc0(sizeof(*new_block));
1463 new_block->mr = mr;
1464 new_block->resized = resized;
1465 new_block->used_length = size;
1466 new_block->max_length = max_size;
1467 assert(max_size >= size);
1468 new_block->fd = -1;
1469 new_block->host = host;
1470 if (host) {
1471 new_block->flags |= RAM_PREALLOC;
1473 if (resizeable) {
1474 new_block->flags |= RAM_RESIZEABLE;
1476 addr = ram_block_add(new_block, &local_err);
1477 if (local_err) {
1478 g_free(new_block);
1479 error_propagate(errp, local_err);
1480 return -1;
1482 return addr;
1485 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1486 MemoryRegion *mr, Error **errp)
1488 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1491 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1493 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1496 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1497 void (*resized)(const char*,
1498 uint64_t length,
1499 void *host),
1500 MemoryRegion *mr, Error **errp)
1502 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1505 void qemu_ram_free_from_ptr(ram_addr_t addr)
1507 RAMBlock *block;
1509 /* This assumes the iothread lock is taken here too. */
1510 qemu_mutex_lock_ramlist();
1511 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1512 if (addr == block->offset) {
1513 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1514 ram_list.mru_block = NULL;
1515 ram_list.version++;
1516 g_free(block);
1517 break;
1520 qemu_mutex_unlock_ramlist();
1523 void qemu_ram_free(ram_addr_t addr)
1525 RAMBlock *block;
1527 /* This assumes the iothread lock is taken here too. */
1528 qemu_mutex_lock_ramlist();
1529 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1530 if (addr == block->offset) {
1531 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1532 ram_list.mru_block = NULL;
1533 ram_list.version++;
1534 if (block->flags & RAM_PREALLOC) {
1536 } else if (xen_enabled()) {
1537 xen_invalidate_map_cache_entry(block->host);
1538 #ifndef _WIN32
1539 } else if (block->fd >= 0) {
1540 munmap(block->host, block->max_length);
1541 close(block->fd);
1542 #endif
1543 } else {
1544 qemu_anon_ram_free(block->host, block->max_length);
1546 g_free(block);
1547 break;
1550 qemu_mutex_unlock_ramlist();
1554 #ifndef _WIN32
1555 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1557 RAMBlock *block;
1558 ram_addr_t offset;
1559 int flags;
1560 void *area, *vaddr;
1562 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1563 offset = addr - block->offset;
1564 if (offset < block->max_length) {
1565 vaddr = ramblock_ptr(block, offset);
1566 if (block->flags & RAM_PREALLOC) {
1568 } else if (xen_enabled()) {
1569 abort();
1570 } else {
1571 flags = MAP_FIXED;
1572 munmap(vaddr, length);
1573 if (block->fd >= 0) {
1574 flags |= (block->flags & RAM_SHARED ?
1575 MAP_SHARED : MAP_PRIVATE);
1576 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1577 flags, block->fd, offset);
1578 } else {
1580 * Remap needs to match alloc. Accelerators that
1581 * set phys_mem_alloc never remap. If they did,
1582 * we'd need a remap hook here.
1584 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1586 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1587 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1588 flags, -1, 0);
1590 if (area != vaddr) {
1591 fprintf(stderr, "Could not remap addr: "
1592 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1593 length, addr);
1594 exit(1);
1596 memory_try_enable_merging(vaddr, length);
1597 qemu_ram_setup_dump(vaddr, length);
1599 return;
1603 #endif /* !_WIN32 */
1605 int qemu_get_ram_fd(ram_addr_t addr)
1607 RAMBlock *block = qemu_get_ram_block(addr);
1609 return block->fd;
1612 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1614 RAMBlock *block = qemu_get_ram_block(addr);
1616 return ramblock_ptr(block, 0);
1619 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1620 With the exception of the softmmu code in this file, this should
1621 only be used for local memory (e.g. video ram) that the device owns,
1622 and knows it isn't going to access beyond the end of the block.
1624 It should not be used for general purpose DMA.
1625 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1627 void *qemu_get_ram_ptr(ram_addr_t addr)
1629 RAMBlock *block = qemu_get_ram_block(addr);
1631 if (xen_enabled()) {
1632 /* We need to check if the requested address is in the RAM
1633 * because we don't want to map the entire memory in QEMU.
1634 * In that case just map until the end of the page.
1636 if (block->offset == 0) {
1637 return xen_map_cache(addr, 0, 0);
1638 } else if (block->host == NULL) {
1639 block->host =
1640 xen_map_cache(block->offset, block->max_length, 1);
1643 return ramblock_ptr(block, addr - block->offset);
1646 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1647 * but takes a size argument */
1648 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1650 if (*size == 0) {
1651 return NULL;
1653 if (xen_enabled()) {
1654 return xen_map_cache(addr, *size, 1);
1655 } else {
1656 RAMBlock *block;
1658 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1659 if (addr - block->offset < block->max_length) {
1660 if (addr - block->offset + *size > block->max_length)
1661 *size = block->max_length - addr + block->offset;
1662 return ramblock_ptr(block, addr - block->offset);
1666 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1667 abort();
1671 /* Some of the softmmu routines need to translate from a host pointer
1672 (typically a TLB entry) back to a ram offset. */
1673 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1675 RAMBlock *block;
1676 uint8_t *host = ptr;
1678 if (xen_enabled()) {
1679 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1680 return qemu_get_ram_block(*ram_addr)->mr;
1683 block = ram_list.mru_block;
1684 if (block && block->host && host - block->host < block->max_length) {
1685 goto found;
1688 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1689 /* This case append when the block is not mapped. */
1690 if (block->host == NULL) {
1691 continue;
1693 if (host - block->host < block->max_length) {
1694 goto found;
1698 return NULL;
1700 found:
1701 *ram_addr = block->offset + (host - block->host);
1702 return block->mr;
1705 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1706 uint64_t val, unsigned size)
1708 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1709 tb_invalidate_phys_page_fast(ram_addr, size);
1711 switch (size) {
1712 case 1:
1713 stb_p(qemu_get_ram_ptr(ram_addr), val);
1714 break;
1715 case 2:
1716 stw_p(qemu_get_ram_ptr(ram_addr), val);
1717 break;
1718 case 4:
1719 stl_p(qemu_get_ram_ptr(ram_addr), val);
1720 break;
1721 default:
1722 abort();
1724 cpu_physical_memory_set_dirty_range_nocode(ram_addr, size);
1725 /* we remove the notdirty callback only if the code has been
1726 flushed */
1727 if (!cpu_physical_memory_is_clean(ram_addr)) {
1728 CPUArchState *env = current_cpu->env_ptr;
1729 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1733 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1734 unsigned size, bool is_write)
1736 return is_write;
1739 static const MemoryRegionOps notdirty_mem_ops = {
1740 .write = notdirty_mem_write,
1741 .valid.accepts = notdirty_mem_accepts,
1742 .endianness = DEVICE_NATIVE_ENDIAN,
1745 /* Generate a debug exception if a watchpoint has been hit. */
1746 static void check_watchpoint(int offset, int len, int flags)
1748 CPUState *cpu = current_cpu;
1749 CPUArchState *env = cpu->env_ptr;
1750 target_ulong pc, cs_base;
1751 target_ulong vaddr;
1752 CPUWatchpoint *wp;
1753 int cpu_flags;
1755 if (cpu->watchpoint_hit) {
1756 /* We re-entered the check after replacing the TB. Now raise
1757 * the debug interrupt so that is will trigger after the
1758 * current instruction. */
1759 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1760 return;
1762 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1763 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1764 if (cpu_watchpoint_address_matches(wp, vaddr, len)
1765 && (wp->flags & flags)) {
1766 if (flags == BP_MEM_READ) {
1767 wp->flags |= BP_WATCHPOINT_HIT_READ;
1768 } else {
1769 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
1771 wp->hitaddr = vaddr;
1772 if (!cpu->watchpoint_hit) {
1773 cpu->watchpoint_hit = wp;
1774 tb_check_watchpoint(cpu);
1775 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1776 cpu->exception_index = EXCP_DEBUG;
1777 cpu_loop_exit(cpu);
1778 } else {
1779 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1780 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1781 cpu_resume_from_signal(cpu, NULL);
1784 } else {
1785 wp->flags &= ~BP_WATCHPOINT_HIT;
1790 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1791 so these check for a hit then pass through to the normal out-of-line
1792 phys routines. */
1793 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1794 unsigned size)
1796 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_READ);
1797 switch (size) {
1798 case 1: return ldub_phys(&address_space_memory, addr);
1799 case 2: return lduw_phys(&address_space_memory, addr);
1800 case 4: return ldl_phys(&address_space_memory, addr);
1801 default: abort();
1805 static void watch_mem_write(void *opaque, hwaddr addr,
1806 uint64_t val, unsigned size)
1808 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_WRITE);
1809 switch (size) {
1810 case 1:
1811 stb_phys(&address_space_memory, addr, val);
1812 break;
1813 case 2:
1814 stw_phys(&address_space_memory, addr, val);
1815 break;
1816 case 4:
1817 stl_phys(&address_space_memory, addr, val);
1818 break;
1819 default: abort();
1823 static const MemoryRegionOps watch_mem_ops = {
1824 .read = watch_mem_read,
1825 .write = watch_mem_write,
1826 .endianness = DEVICE_NATIVE_ENDIAN,
1829 static uint64_t subpage_read(void *opaque, hwaddr addr,
1830 unsigned len)
1832 subpage_t *subpage = opaque;
1833 uint8_t buf[8];
1835 #if defined(DEBUG_SUBPAGE)
1836 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1837 subpage, len, addr);
1838 #endif
1839 address_space_read(subpage->as, addr + subpage->base, buf, len);
1840 switch (len) {
1841 case 1:
1842 return ldub_p(buf);
1843 case 2:
1844 return lduw_p(buf);
1845 case 4:
1846 return ldl_p(buf);
1847 case 8:
1848 return ldq_p(buf);
1849 default:
1850 abort();
1854 static void subpage_write(void *opaque, hwaddr addr,
1855 uint64_t value, unsigned len)
1857 subpage_t *subpage = opaque;
1858 uint8_t buf[8];
1860 #if defined(DEBUG_SUBPAGE)
1861 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1862 " value %"PRIx64"\n",
1863 __func__, subpage, len, addr, value);
1864 #endif
1865 switch (len) {
1866 case 1:
1867 stb_p(buf, value);
1868 break;
1869 case 2:
1870 stw_p(buf, value);
1871 break;
1872 case 4:
1873 stl_p(buf, value);
1874 break;
1875 case 8:
1876 stq_p(buf, value);
1877 break;
1878 default:
1879 abort();
1881 address_space_write(subpage->as, addr + subpage->base, buf, len);
1884 static bool subpage_accepts(void *opaque, hwaddr addr,
1885 unsigned len, bool is_write)
1887 subpage_t *subpage = opaque;
1888 #if defined(DEBUG_SUBPAGE)
1889 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1890 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1891 #endif
1893 return address_space_access_valid(subpage->as, addr + subpage->base,
1894 len, is_write);
1897 static const MemoryRegionOps subpage_ops = {
1898 .read = subpage_read,
1899 .write = subpage_write,
1900 .impl.min_access_size = 1,
1901 .impl.max_access_size = 8,
1902 .valid.min_access_size = 1,
1903 .valid.max_access_size = 8,
1904 .valid.accepts = subpage_accepts,
1905 .endianness = DEVICE_NATIVE_ENDIAN,
1908 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1909 uint16_t section)
1911 int idx, eidx;
1913 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1914 return -1;
1915 idx = SUBPAGE_IDX(start);
1916 eidx = SUBPAGE_IDX(end);
1917 #if defined(DEBUG_SUBPAGE)
1918 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1919 __func__, mmio, start, end, idx, eidx, section);
1920 #endif
1921 for (; idx <= eidx; idx++) {
1922 mmio->sub_section[idx] = section;
1925 return 0;
1928 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1930 subpage_t *mmio;
1932 mmio = g_malloc0(sizeof(subpage_t));
1934 mmio->as = as;
1935 mmio->base = base;
1936 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1937 NULL, TARGET_PAGE_SIZE);
1938 mmio->iomem.subpage = true;
1939 #if defined(DEBUG_SUBPAGE)
1940 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1941 mmio, base, TARGET_PAGE_SIZE);
1942 #endif
1943 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1945 return mmio;
1948 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
1949 MemoryRegion *mr)
1951 assert(as);
1952 MemoryRegionSection section = {
1953 .address_space = as,
1954 .mr = mr,
1955 .offset_within_address_space = 0,
1956 .offset_within_region = 0,
1957 .size = int128_2_64(),
1960 return phys_section_add(map, &section);
1963 MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1965 return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1968 static void io_mem_init(void)
1970 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
1971 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1972 NULL, UINT64_MAX);
1973 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1974 NULL, UINT64_MAX);
1975 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1976 NULL, UINT64_MAX);
1979 static void mem_begin(MemoryListener *listener)
1981 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1982 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1983 uint16_t n;
1985 n = dummy_section(&d->map, as, &io_mem_unassigned);
1986 assert(n == PHYS_SECTION_UNASSIGNED);
1987 n = dummy_section(&d->map, as, &io_mem_notdirty);
1988 assert(n == PHYS_SECTION_NOTDIRTY);
1989 n = dummy_section(&d->map, as, &io_mem_rom);
1990 assert(n == PHYS_SECTION_ROM);
1991 n = dummy_section(&d->map, as, &io_mem_watch);
1992 assert(n == PHYS_SECTION_WATCH);
1994 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1995 d->as = as;
1996 as->next_dispatch = d;
1999 static void mem_commit(MemoryListener *listener)
2001 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2002 AddressSpaceDispatch *cur = as->dispatch;
2003 AddressSpaceDispatch *next = as->next_dispatch;
2005 phys_page_compact_all(next, next->map.nodes_nb);
2007 as->dispatch = next;
2009 if (cur) {
2010 phys_sections_free(&cur->map);
2011 g_free(cur);
2015 static void tcg_commit(MemoryListener *listener)
2017 CPUState *cpu;
2019 /* since each CPU stores ram addresses in its TLB cache, we must
2020 reset the modified entries */
2021 /* XXX: slow ! */
2022 CPU_FOREACH(cpu) {
2023 /* FIXME: Disentangle the cpu.h circular files deps so we can
2024 directly get the right CPU from listener. */
2025 if (cpu->tcg_as_listener != listener) {
2026 continue;
2028 tlb_flush(cpu, 1);
2032 static void core_log_global_start(MemoryListener *listener)
2034 cpu_physical_memory_set_dirty_tracking(true);
2037 static void core_log_global_stop(MemoryListener *listener)
2039 cpu_physical_memory_set_dirty_tracking(false);
2042 static MemoryListener core_memory_listener = {
2043 .log_global_start = core_log_global_start,
2044 .log_global_stop = core_log_global_stop,
2045 .priority = 1,
2048 void address_space_init_dispatch(AddressSpace *as)
2050 as->dispatch = NULL;
2051 as->dispatch_listener = (MemoryListener) {
2052 .begin = mem_begin,
2053 .commit = mem_commit,
2054 .region_add = mem_add,
2055 .region_nop = mem_add,
2056 .priority = 0,
2058 memory_listener_register(&as->dispatch_listener, as);
2061 void address_space_destroy_dispatch(AddressSpace *as)
2063 AddressSpaceDispatch *d = as->dispatch;
2065 memory_listener_unregister(&as->dispatch_listener);
2066 g_free(d);
2067 as->dispatch = NULL;
2070 static void memory_map_init(void)
2072 system_memory = g_malloc(sizeof(*system_memory));
2074 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2075 address_space_init(&address_space_memory, system_memory, "memory");
2077 system_io = g_malloc(sizeof(*system_io));
2078 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2079 65536);
2080 address_space_init(&address_space_io, system_io, "I/O");
2082 memory_listener_register(&core_memory_listener, &address_space_memory);
2085 MemoryRegion *get_system_memory(void)
2087 return system_memory;
2090 MemoryRegion *get_system_io(void)
2092 return system_io;
2095 #endif /* !defined(CONFIG_USER_ONLY) */
2097 /* physical memory access (slow version, mainly for debug) */
2098 #if defined(CONFIG_USER_ONLY)
2099 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2100 uint8_t *buf, int len, int is_write)
2102 int l, flags;
2103 target_ulong page;
2104 void * p;
2106 while (len > 0) {
2107 page = addr & TARGET_PAGE_MASK;
2108 l = (page + TARGET_PAGE_SIZE) - addr;
2109 if (l > len)
2110 l = len;
2111 flags = page_get_flags(page);
2112 if (!(flags & PAGE_VALID))
2113 return -1;
2114 if (is_write) {
2115 if (!(flags & PAGE_WRITE))
2116 return -1;
2117 /* XXX: this code should not depend on lock_user */
2118 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2119 return -1;
2120 memcpy(p, buf, l);
2121 unlock_user(p, addr, l);
2122 } else {
2123 if (!(flags & PAGE_READ))
2124 return -1;
2125 /* XXX: this code should not depend on lock_user */
2126 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2127 return -1;
2128 memcpy(buf, p, l);
2129 unlock_user(p, addr, 0);
2131 len -= l;
2132 buf += l;
2133 addr += l;
2135 return 0;
2138 #else
2140 static void invalidate_and_set_dirty(hwaddr addr,
2141 hwaddr length)
2143 if (cpu_physical_memory_range_includes_clean(addr, length)) {
2144 tb_invalidate_phys_range(addr, addr + length, 0);
2145 cpu_physical_memory_set_dirty_range_nocode(addr, length);
2147 xen_modified_memory(addr, length);
2150 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2152 unsigned access_size_max = mr->ops->valid.max_access_size;
2154 /* Regions are assumed to support 1-4 byte accesses unless
2155 otherwise specified. */
2156 if (access_size_max == 0) {
2157 access_size_max = 4;
2160 /* Bound the maximum access by the alignment of the address. */
2161 if (!mr->ops->impl.unaligned) {
2162 unsigned align_size_max = addr & -addr;
2163 if (align_size_max != 0 && align_size_max < access_size_max) {
2164 access_size_max = align_size_max;
2168 /* Don't attempt accesses larger than the maximum. */
2169 if (l > access_size_max) {
2170 l = access_size_max;
2172 if (l & (l - 1)) {
2173 l = 1 << (qemu_fls(l) - 1);
2176 return l;
2179 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
2180 int len, bool is_write)
2182 hwaddr l;
2183 uint8_t *ptr;
2184 uint64_t val;
2185 hwaddr addr1;
2186 MemoryRegion *mr;
2187 bool error = false;
2189 while (len > 0) {
2190 l = len;
2191 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2193 if (is_write) {
2194 if (!memory_access_is_direct(mr, is_write)) {
2195 l = memory_access_size(mr, l, addr1);
2196 /* XXX: could force current_cpu to NULL to avoid
2197 potential bugs */
2198 switch (l) {
2199 case 8:
2200 /* 64 bit write access */
2201 val = ldq_p(buf);
2202 error |= io_mem_write(mr, addr1, val, 8);
2203 break;
2204 case 4:
2205 /* 32 bit write access */
2206 val = ldl_p(buf);
2207 error |= io_mem_write(mr, addr1, val, 4);
2208 break;
2209 case 2:
2210 /* 16 bit write access */
2211 val = lduw_p(buf);
2212 error |= io_mem_write(mr, addr1, val, 2);
2213 break;
2214 case 1:
2215 /* 8 bit write access */
2216 val = ldub_p(buf);
2217 error |= io_mem_write(mr, addr1, val, 1);
2218 break;
2219 default:
2220 abort();
2222 } else {
2223 addr1 += memory_region_get_ram_addr(mr);
2224 /* RAM case */
2225 ptr = qemu_get_ram_ptr(addr1);
2226 memcpy(ptr, buf, l);
2227 invalidate_and_set_dirty(addr1, l);
2229 } else {
2230 if (!memory_access_is_direct(mr, is_write)) {
2231 /* I/O case */
2232 l = memory_access_size(mr, l, addr1);
2233 switch (l) {
2234 case 8:
2235 /* 64 bit read access */
2236 error |= io_mem_read(mr, addr1, &val, 8);
2237 stq_p(buf, val);
2238 break;
2239 case 4:
2240 /* 32 bit read access */
2241 error |= io_mem_read(mr, addr1, &val, 4);
2242 stl_p(buf, val);
2243 break;
2244 case 2:
2245 /* 16 bit read access */
2246 error |= io_mem_read(mr, addr1, &val, 2);
2247 stw_p(buf, val);
2248 break;
2249 case 1:
2250 /* 8 bit read access */
2251 error |= io_mem_read(mr, addr1, &val, 1);
2252 stb_p(buf, val);
2253 break;
2254 default:
2255 abort();
2257 } else {
2258 /* RAM case */
2259 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2260 memcpy(buf, ptr, l);
2263 len -= l;
2264 buf += l;
2265 addr += l;
2268 return error;
2271 bool address_space_write(AddressSpace *as, hwaddr addr,
2272 const uint8_t *buf, int len)
2274 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2277 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2279 return address_space_rw(as, addr, buf, len, false);
2283 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2284 int len, int is_write)
2286 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2289 enum write_rom_type {
2290 WRITE_DATA,
2291 FLUSH_CACHE,
2294 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2295 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2297 hwaddr l;
2298 uint8_t *ptr;
2299 hwaddr addr1;
2300 MemoryRegion *mr;
2302 while (len > 0) {
2303 l = len;
2304 mr = address_space_translate(as, addr, &addr1, &l, true);
2306 if (!(memory_region_is_ram(mr) ||
2307 memory_region_is_romd(mr))) {
2308 /* do nothing */
2309 } else {
2310 addr1 += memory_region_get_ram_addr(mr);
2311 /* ROM/RAM case */
2312 ptr = qemu_get_ram_ptr(addr1);
2313 switch (type) {
2314 case WRITE_DATA:
2315 memcpy(ptr, buf, l);
2316 invalidate_and_set_dirty(addr1, l);
2317 break;
2318 case FLUSH_CACHE:
2319 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2320 break;
2323 len -= l;
2324 buf += l;
2325 addr += l;
2329 /* used for ROM loading : can write in RAM and ROM */
2330 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2331 const uint8_t *buf, int len)
2333 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2336 void cpu_flush_icache_range(hwaddr start, int len)
2339 * This function should do the same thing as an icache flush that was
2340 * triggered from within the guest. For TCG we are always cache coherent,
2341 * so there is no need to flush anything. For KVM / Xen we need to flush
2342 * the host's instruction cache at least.
2344 if (tcg_enabled()) {
2345 return;
2348 cpu_physical_memory_write_rom_internal(&address_space_memory,
2349 start, NULL, len, FLUSH_CACHE);
2352 typedef struct {
2353 MemoryRegion *mr;
2354 void *buffer;
2355 hwaddr addr;
2356 hwaddr len;
2357 } BounceBuffer;
2359 static BounceBuffer bounce;
2361 typedef struct MapClient {
2362 void *opaque;
2363 void (*callback)(void *opaque);
2364 QLIST_ENTRY(MapClient) link;
2365 } MapClient;
2367 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2368 = QLIST_HEAD_INITIALIZER(map_client_list);
2370 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2372 MapClient *client = g_malloc(sizeof(*client));
2374 client->opaque = opaque;
2375 client->callback = callback;
2376 QLIST_INSERT_HEAD(&map_client_list, client, link);
2377 return client;
2380 static void cpu_unregister_map_client(void *_client)
2382 MapClient *client = (MapClient *)_client;
2384 QLIST_REMOVE(client, link);
2385 g_free(client);
2388 static void cpu_notify_map_clients(void)
2390 MapClient *client;
2392 while (!QLIST_EMPTY(&map_client_list)) {
2393 client = QLIST_FIRST(&map_client_list);
2394 client->callback(client->opaque);
2395 cpu_unregister_map_client(client);
2399 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2401 MemoryRegion *mr;
2402 hwaddr l, xlat;
2404 while (len > 0) {
2405 l = len;
2406 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2407 if (!memory_access_is_direct(mr, is_write)) {
2408 l = memory_access_size(mr, l, addr);
2409 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2410 return false;
2414 len -= l;
2415 addr += l;
2417 return true;
2420 /* Map a physical memory region into a host virtual address.
2421 * May map a subset of the requested range, given by and returned in *plen.
2422 * May return NULL if resources needed to perform the mapping are exhausted.
2423 * Use only for reads OR writes - not for read-modify-write operations.
2424 * Use cpu_register_map_client() to know when retrying the map operation is
2425 * likely to succeed.
2427 void *address_space_map(AddressSpace *as,
2428 hwaddr addr,
2429 hwaddr *plen,
2430 bool is_write)
2432 hwaddr len = *plen;
2433 hwaddr done = 0;
2434 hwaddr l, xlat, base;
2435 MemoryRegion *mr, *this_mr;
2436 ram_addr_t raddr;
2438 if (len == 0) {
2439 return NULL;
2442 l = len;
2443 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2444 if (!memory_access_is_direct(mr, is_write)) {
2445 if (bounce.buffer) {
2446 return NULL;
2448 /* Avoid unbounded allocations */
2449 l = MIN(l, TARGET_PAGE_SIZE);
2450 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2451 bounce.addr = addr;
2452 bounce.len = l;
2454 memory_region_ref(mr);
2455 bounce.mr = mr;
2456 if (!is_write) {
2457 address_space_read(as, addr, bounce.buffer, l);
2460 *plen = l;
2461 return bounce.buffer;
2464 base = xlat;
2465 raddr = memory_region_get_ram_addr(mr);
2467 for (;;) {
2468 len -= l;
2469 addr += l;
2470 done += l;
2471 if (len == 0) {
2472 break;
2475 l = len;
2476 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2477 if (this_mr != mr || xlat != base + done) {
2478 break;
2482 memory_region_ref(mr);
2483 *plen = done;
2484 return qemu_ram_ptr_length(raddr + base, plen);
2487 /* Unmaps a memory region previously mapped by address_space_map().
2488 * Will also mark the memory as dirty if is_write == 1. access_len gives
2489 * the amount of memory that was actually read or written by the caller.
2491 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2492 int is_write, hwaddr access_len)
2494 if (buffer != bounce.buffer) {
2495 MemoryRegion *mr;
2496 ram_addr_t addr1;
2498 mr = qemu_ram_addr_from_host(buffer, &addr1);
2499 assert(mr != NULL);
2500 if (is_write) {
2501 invalidate_and_set_dirty(addr1, access_len);
2503 if (xen_enabled()) {
2504 xen_invalidate_map_cache_entry(buffer);
2506 memory_region_unref(mr);
2507 return;
2509 if (is_write) {
2510 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2512 qemu_vfree(bounce.buffer);
2513 bounce.buffer = NULL;
2514 memory_region_unref(bounce.mr);
2515 cpu_notify_map_clients();
2518 void *cpu_physical_memory_map(hwaddr addr,
2519 hwaddr *plen,
2520 int is_write)
2522 return address_space_map(&address_space_memory, addr, plen, is_write);
2525 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2526 int is_write, hwaddr access_len)
2528 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2531 /* warning: addr must be aligned */
2532 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2533 enum device_endian endian)
2535 uint8_t *ptr;
2536 uint64_t val;
2537 MemoryRegion *mr;
2538 hwaddr l = 4;
2539 hwaddr addr1;
2541 mr = address_space_translate(as, addr, &addr1, &l, false);
2542 if (l < 4 || !memory_access_is_direct(mr, false)) {
2543 /* I/O case */
2544 io_mem_read(mr, addr1, &val, 4);
2545 #if defined(TARGET_WORDS_BIGENDIAN)
2546 if (endian == DEVICE_LITTLE_ENDIAN) {
2547 val = bswap32(val);
2549 #else
2550 if (endian == DEVICE_BIG_ENDIAN) {
2551 val = bswap32(val);
2553 #endif
2554 } else {
2555 /* RAM case */
2556 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2557 & TARGET_PAGE_MASK)
2558 + addr1);
2559 switch (endian) {
2560 case DEVICE_LITTLE_ENDIAN:
2561 val = ldl_le_p(ptr);
2562 break;
2563 case DEVICE_BIG_ENDIAN:
2564 val = ldl_be_p(ptr);
2565 break;
2566 default:
2567 val = ldl_p(ptr);
2568 break;
2571 return val;
2574 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2576 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2579 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2581 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2584 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2586 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2589 /* warning: addr must be aligned */
2590 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2591 enum device_endian endian)
2593 uint8_t *ptr;
2594 uint64_t val;
2595 MemoryRegion *mr;
2596 hwaddr l = 8;
2597 hwaddr addr1;
2599 mr = address_space_translate(as, addr, &addr1, &l,
2600 false);
2601 if (l < 8 || !memory_access_is_direct(mr, false)) {
2602 /* I/O case */
2603 io_mem_read(mr, addr1, &val, 8);
2604 #if defined(TARGET_WORDS_BIGENDIAN)
2605 if (endian == DEVICE_LITTLE_ENDIAN) {
2606 val = bswap64(val);
2608 #else
2609 if (endian == DEVICE_BIG_ENDIAN) {
2610 val = bswap64(val);
2612 #endif
2613 } else {
2614 /* RAM case */
2615 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2616 & TARGET_PAGE_MASK)
2617 + addr1);
2618 switch (endian) {
2619 case DEVICE_LITTLE_ENDIAN:
2620 val = ldq_le_p(ptr);
2621 break;
2622 case DEVICE_BIG_ENDIAN:
2623 val = ldq_be_p(ptr);
2624 break;
2625 default:
2626 val = ldq_p(ptr);
2627 break;
2630 return val;
2633 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2635 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2638 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2640 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2643 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2645 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2648 /* XXX: optimize */
2649 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2651 uint8_t val;
2652 address_space_rw(as, addr, &val, 1, 0);
2653 return val;
2656 /* warning: addr must be aligned */
2657 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2658 enum device_endian endian)
2660 uint8_t *ptr;
2661 uint64_t val;
2662 MemoryRegion *mr;
2663 hwaddr l = 2;
2664 hwaddr addr1;
2666 mr = address_space_translate(as, addr, &addr1, &l,
2667 false);
2668 if (l < 2 || !memory_access_is_direct(mr, false)) {
2669 /* I/O case */
2670 io_mem_read(mr, addr1, &val, 2);
2671 #if defined(TARGET_WORDS_BIGENDIAN)
2672 if (endian == DEVICE_LITTLE_ENDIAN) {
2673 val = bswap16(val);
2675 #else
2676 if (endian == DEVICE_BIG_ENDIAN) {
2677 val = bswap16(val);
2679 #endif
2680 } else {
2681 /* RAM case */
2682 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2683 & TARGET_PAGE_MASK)
2684 + addr1);
2685 switch (endian) {
2686 case DEVICE_LITTLE_ENDIAN:
2687 val = lduw_le_p(ptr);
2688 break;
2689 case DEVICE_BIG_ENDIAN:
2690 val = lduw_be_p(ptr);
2691 break;
2692 default:
2693 val = lduw_p(ptr);
2694 break;
2697 return val;
2700 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2702 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2705 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2707 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2710 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2712 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2715 /* warning: addr must be aligned. The ram page is not masked as dirty
2716 and the code inside is not invalidated. It is useful if the dirty
2717 bits are used to track modified PTEs */
2718 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2720 uint8_t *ptr;
2721 MemoryRegion *mr;
2722 hwaddr l = 4;
2723 hwaddr addr1;
2725 mr = address_space_translate(as, addr, &addr1, &l,
2726 true);
2727 if (l < 4 || !memory_access_is_direct(mr, true)) {
2728 io_mem_write(mr, addr1, val, 4);
2729 } else {
2730 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2731 ptr = qemu_get_ram_ptr(addr1);
2732 stl_p(ptr, val);
2734 if (unlikely(in_migration)) {
2735 if (cpu_physical_memory_is_clean(addr1)) {
2736 /* invalidate code */
2737 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2738 /* set dirty bit */
2739 cpu_physical_memory_set_dirty_range_nocode(addr1, 4);
2745 /* warning: addr must be aligned */
2746 static inline void stl_phys_internal(AddressSpace *as,
2747 hwaddr addr, uint32_t val,
2748 enum device_endian endian)
2750 uint8_t *ptr;
2751 MemoryRegion *mr;
2752 hwaddr l = 4;
2753 hwaddr addr1;
2755 mr = address_space_translate(as, addr, &addr1, &l,
2756 true);
2757 if (l < 4 || !memory_access_is_direct(mr, true)) {
2758 #if defined(TARGET_WORDS_BIGENDIAN)
2759 if (endian == DEVICE_LITTLE_ENDIAN) {
2760 val = bswap32(val);
2762 #else
2763 if (endian == DEVICE_BIG_ENDIAN) {
2764 val = bswap32(val);
2766 #endif
2767 io_mem_write(mr, addr1, val, 4);
2768 } else {
2769 /* RAM case */
2770 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2771 ptr = qemu_get_ram_ptr(addr1);
2772 switch (endian) {
2773 case DEVICE_LITTLE_ENDIAN:
2774 stl_le_p(ptr, val);
2775 break;
2776 case DEVICE_BIG_ENDIAN:
2777 stl_be_p(ptr, val);
2778 break;
2779 default:
2780 stl_p(ptr, val);
2781 break;
2783 invalidate_and_set_dirty(addr1, 4);
2787 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2789 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2792 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2794 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2797 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2799 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2802 /* XXX: optimize */
2803 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2805 uint8_t v = val;
2806 address_space_rw(as, addr, &v, 1, 1);
2809 /* warning: addr must be aligned */
2810 static inline void stw_phys_internal(AddressSpace *as,
2811 hwaddr addr, uint32_t val,
2812 enum device_endian endian)
2814 uint8_t *ptr;
2815 MemoryRegion *mr;
2816 hwaddr l = 2;
2817 hwaddr addr1;
2819 mr = address_space_translate(as, addr, &addr1, &l, true);
2820 if (l < 2 || !memory_access_is_direct(mr, true)) {
2821 #if defined(TARGET_WORDS_BIGENDIAN)
2822 if (endian == DEVICE_LITTLE_ENDIAN) {
2823 val = bswap16(val);
2825 #else
2826 if (endian == DEVICE_BIG_ENDIAN) {
2827 val = bswap16(val);
2829 #endif
2830 io_mem_write(mr, addr1, val, 2);
2831 } else {
2832 /* RAM case */
2833 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2834 ptr = qemu_get_ram_ptr(addr1);
2835 switch (endian) {
2836 case DEVICE_LITTLE_ENDIAN:
2837 stw_le_p(ptr, val);
2838 break;
2839 case DEVICE_BIG_ENDIAN:
2840 stw_be_p(ptr, val);
2841 break;
2842 default:
2843 stw_p(ptr, val);
2844 break;
2846 invalidate_and_set_dirty(addr1, 2);
2850 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2852 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2855 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2857 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2860 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2862 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2865 /* XXX: optimize */
2866 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2868 val = tswap64(val);
2869 address_space_rw(as, addr, (void *) &val, 8, 1);
2872 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2874 val = cpu_to_le64(val);
2875 address_space_rw(as, addr, (void *) &val, 8, 1);
2878 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2880 val = cpu_to_be64(val);
2881 address_space_rw(as, addr, (void *) &val, 8, 1);
2884 /* virtual memory access for debug (includes writing to ROM) */
2885 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2886 uint8_t *buf, int len, int is_write)
2888 int l;
2889 hwaddr phys_addr;
2890 target_ulong page;
2892 while (len > 0) {
2893 page = addr & TARGET_PAGE_MASK;
2894 phys_addr = cpu_get_phys_page_debug(cpu, page);
2895 /* if no physical page mapped, return an error */
2896 if (phys_addr == -1)
2897 return -1;
2898 l = (page + TARGET_PAGE_SIZE) - addr;
2899 if (l > len)
2900 l = len;
2901 phys_addr += (addr & ~TARGET_PAGE_MASK);
2902 if (is_write) {
2903 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
2904 } else {
2905 address_space_rw(cpu->as, phys_addr, buf, l, 0);
2907 len -= l;
2908 buf += l;
2909 addr += l;
2911 return 0;
2913 #endif
2916 * A helper function for the _utterly broken_ virtio device model to find out if
2917 * it's running on a big endian machine. Don't do this at home kids!
2919 bool target_words_bigendian(void);
2920 bool target_words_bigendian(void)
2922 #if defined(TARGET_WORDS_BIGENDIAN)
2923 return true;
2924 #else
2925 return false;
2926 #endif
2929 #ifndef CONFIG_USER_ONLY
2930 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2932 MemoryRegion*mr;
2933 hwaddr l = 1;
2935 mr = address_space_translate(&address_space_memory,
2936 phys_addr, &phys_addr, &l, false);
2938 return !(memory_region_is_ram(mr) ||
2939 memory_region_is_romd(mr));
2942 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2944 RAMBlock *block;
2946 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2947 func(block->host, block->offset, block->used_length, opaque);
2950 #endif