qga: introduce three guest memory block commmands with stubs
[qemu.git] / exec.c
blob6dff7bc43a67b836b58fe18136a954310e65bdee
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #include "hw/qdev.h"
30 #include "qemu/osdep.h"
31 #include "sysemu/kvm.h"
32 #include "sysemu/sysemu.h"
33 #include "hw/xen/xen.h"
34 #include "qemu/timer.h"
35 #include "qemu/config-file.h"
36 #include "qemu/error-report.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
51 #include "exec/memory-internal.h"
52 #include "exec/ram_addr.h"
54 #include "qemu/range.h"
56 //#define DEBUG_SUBPAGE
58 #if !defined(CONFIG_USER_ONLY)
59 static bool in_migration;
61 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
63 static MemoryRegion *system_memory;
64 static MemoryRegion *system_io;
66 AddressSpace address_space_io;
67 AddressSpace address_space_memory;
69 MemoryRegion io_mem_rom, io_mem_notdirty;
70 static MemoryRegion io_mem_unassigned;
72 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
73 #define RAM_PREALLOC (1 << 0)
75 /* RAM is mmap-ed with MAP_SHARED */
76 #define RAM_SHARED (1 << 1)
78 /* Only a portion of RAM (used_length) is actually used, and migrated.
79 * This used_length size can change across reboots.
81 #define RAM_RESIZEABLE (1 << 2)
83 #endif
85 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
86 /* current CPU in the current thread. It is only valid inside
87 cpu_exec() */
88 DEFINE_TLS(CPUState *, current_cpu);
89 /* 0 = Do not count executed instructions.
90 1 = Precise instruction counting.
91 2 = Adaptive rate instruction counting. */
92 int use_icount;
94 #if !defined(CONFIG_USER_ONLY)
96 typedef struct PhysPageEntry PhysPageEntry;
98 struct PhysPageEntry {
99 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
100 uint32_t skip : 6;
101 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
102 uint32_t ptr : 26;
105 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
107 /* Size of the L2 (and L3, etc) page tables. */
108 #define ADDR_SPACE_BITS 64
110 #define P_L2_BITS 9
111 #define P_L2_SIZE (1 << P_L2_BITS)
113 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
115 typedef PhysPageEntry Node[P_L2_SIZE];
117 typedef struct PhysPageMap {
118 unsigned sections_nb;
119 unsigned sections_nb_alloc;
120 unsigned nodes_nb;
121 unsigned nodes_nb_alloc;
122 Node *nodes;
123 MemoryRegionSection *sections;
124 } PhysPageMap;
126 struct AddressSpaceDispatch {
127 /* This is a multi-level map on the physical address space.
128 * The bottom level has pointers to MemoryRegionSections.
130 PhysPageEntry phys_map;
131 PhysPageMap map;
132 AddressSpace *as;
135 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
136 typedef struct subpage_t {
137 MemoryRegion iomem;
138 AddressSpace *as;
139 hwaddr base;
140 uint16_t sub_section[TARGET_PAGE_SIZE];
141 } subpage_t;
143 #define PHYS_SECTION_UNASSIGNED 0
144 #define PHYS_SECTION_NOTDIRTY 1
145 #define PHYS_SECTION_ROM 2
146 #define PHYS_SECTION_WATCH 3
148 static void io_mem_init(void);
149 static void memory_map_init(void);
150 static void tcg_commit(MemoryListener *listener);
152 static MemoryRegion io_mem_watch;
153 #endif
155 #if !defined(CONFIG_USER_ONLY)
157 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
159 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
160 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
161 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
162 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
166 static uint32_t phys_map_node_alloc(PhysPageMap *map)
168 unsigned i;
169 uint32_t ret;
171 ret = map->nodes_nb++;
172 assert(ret != PHYS_MAP_NODE_NIL);
173 assert(ret != map->nodes_nb_alloc);
174 for (i = 0; i < P_L2_SIZE; ++i) {
175 map->nodes[ret][i].skip = 1;
176 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
178 return ret;
181 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
182 hwaddr *index, hwaddr *nb, uint16_t leaf,
183 int level)
185 PhysPageEntry *p;
186 int i;
187 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
189 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
190 lp->ptr = phys_map_node_alloc(map);
191 p = map->nodes[lp->ptr];
192 if (level == 0) {
193 for (i = 0; i < P_L2_SIZE; i++) {
194 p[i].skip = 0;
195 p[i].ptr = PHYS_SECTION_UNASSIGNED;
198 } else {
199 p = map->nodes[lp->ptr];
201 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
203 while (*nb && lp < &p[P_L2_SIZE]) {
204 if ((*index & (step - 1)) == 0 && *nb >= step) {
205 lp->skip = 0;
206 lp->ptr = leaf;
207 *index += step;
208 *nb -= step;
209 } else {
210 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
212 ++lp;
216 static void phys_page_set(AddressSpaceDispatch *d,
217 hwaddr index, hwaddr nb,
218 uint16_t leaf)
220 /* Wildly overreserve - it doesn't matter much. */
221 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
223 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
226 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
227 * and update our entry so we can skip it and go directly to the destination.
229 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
231 unsigned valid_ptr = P_L2_SIZE;
232 int valid = 0;
233 PhysPageEntry *p;
234 int i;
236 if (lp->ptr == PHYS_MAP_NODE_NIL) {
237 return;
240 p = nodes[lp->ptr];
241 for (i = 0; i < P_L2_SIZE; i++) {
242 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
243 continue;
246 valid_ptr = i;
247 valid++;
248 if (p[i].skip) {
249 phys_page_compact(&p[i], nodes, compacted);
253 /* We can only compress if there's only one child. */
254 if (valid != 1) {
255 return;
258 assert(valid_ptr < P_L2_SIZE);
260 /* Don't compress if it won't fit in the # of bits we have. */
261 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
262 return;
265 lp->ptr = p[valid_ptr].ptr;
266 if (!p[valid_ptr].skip) {
267 /* If our only child is a leaf, make this a leaf. */
268 /* By design, we should have made this node a leaf to begin with so we
269 * should never reach here.
270 * But since it's so simple to handle this, let's do it just in case we
271 * change this rule.
273 lp->skip = 0;
274 } else {
275 lp->skip += p[valid_ptr].skip;
279 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
281 DECLARE_BITMAP(compacted, nodes_nb);
283 if (d->phys_map.skip) {
284 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
288 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
289 Node *nodes, MemoryRegionSection *sections)
291 PhysPageEntry *p;
292 hwaddr index = addr >> TARGET_PAGE_BITS;
293 int i;
295 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
296 if (lp.ptr == PHYS_MAP_NODE_NIL) {
297 return &sections[PHYS_SECTION_UNASSIGNED];
299 p = nodes[lp.ptr];
300 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
303 if (sections[lp.ptr].size.hi ||
304 range_covers_byte(sections[lp.ptr].offset_within_address_space,
305 sections[lp.ptr].size.lo, addr)) {
306 return &sections[lp.ptr];
307 } else {
308 return &sections[PHYS_SECTION_UNASSIGNED];
312 bool memory_region_is_unassigned(MemoryRegion *mr)
314 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
315 && mr != &io_mem_watch;
318 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
319 hwaddr addr,
320 bool resolve_subpage)
322 MemoryRegionSection *section;
323 subpage_t *subpage;
325 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
326 if (resolve_subpage && section->mr->subpage) {
327 subpage = container_of(section->mr, subpage_t, iomem);
328 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
330 return section;
333 static MemoryRegionSection *
334 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
335 hwaddr *plen, bool resolve_subpage)
337 MemoryRegionSection *section;
338 Int128 diff;
340 section = address_space_lookup_region(d, addr, resolve_subpage);
341 /* Compute offset within MemoryRegionSection */
342 addr -= section->offset_within_address_space;
344 /* Compute offset within MemoryRegion */
345 *xlat = addr + section->offset_within_region;
347 diff = int128_sub(section->mr->size, int128_make64(addr));
348 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
349 return section;
352 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
354 if (memory_region_is_ram(mr)) {
355 return !(is_write && mr->readonly);
357 if (memory_region_is_romd(mr)) {
358 return !is_write;
361 return false;
364 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
365 hwaddr *xlat, hwaddr *plen,
366 bool is_write)
368 IOMMUTLBEntry iotlb;
369 MemoryRegionSection *section;
370 MemoryRegion *mr;
371 hwaddr len = *plen;
373 for (;;) {
374 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
375 mr = section->mr;
377 if (!mr->iommu_ops) {
378 break;
381 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
382 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
383 | (addr & iotlb.addr_mask));
384 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
385 if (!(iotlb.perm & (1 << is_write))) {
386 mr = &io_mem_unassigned;
387 break;
390 as = iotlb.target_as;
393 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
394 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
395 len = MIN(page, len);
398 *plen = len;
399 *xlat = addr;
400 return mr;
403 MemoryRegionSection *
404 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
405 hwaddr *plen)
407 MemoryRegionSection *section;
408 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
410 assert(!section->mr->iommu_ops);
411 return section;
413 #endif
415 void cpu_exec_init_all(void)
417 #if !defined(CONFIG_USER_ONLY)
418 qemu_mutex_init(&ram_list.mutex);
419 memory_map_init();
420 io_mem_init();
421 #endif
424 #if !defined(CONFIG_USER_ONLY)
426 static int cpu_common_post_load(void *opaque, int version_id)
428 CPUState *cpu = opaque;
430 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
431 version_id is increased. */
432 cpu->interrupt_request &= ~0x01;
433 tlb_flush(cpu, 1);
435 return 0;
438 static int cpu_common_pre_load(void *opaque)
440 CPUState *cpu = opaque;
442 cpu->exception_index = -1;
444 return 0;
447 static bool cpu_common_exception_index_needed(void *opaque)
449 CPUState *cpu = opaque;
451 return tcg_enabled() && cpu->exception_index != -1;
454 static const VMStateDescription vmstate_cpu_common_exception_index = {
455 .name = "cpu_common/exception_index",
456 .version_id = 1,
457 .minimum_version_id = 1,
458 .fields = (VMStateField[]) {
459 VMSTATE_INT32(exception_index, CPUState),
460 VMSTATE_END_OF_LIST()
464 const VMStateDescription vmstate_cpu_common = {
465 .name = "cpu_common",
466 .version_id = 1,
467 .minimum_version_id = 1,
468 .pre_load = cpu_common_pre_load,
469 .post_load = cpu_common_post_load,
470 .fields = (VMStateField[]) {
471 VMSTATE_UINT32(halted, CPUState),
472 VMSTATE_UINT32(interrupt_request, CPUState),
473 VMSTATE_END_OF_LIST()
475 .subsections = (VMStateSubsection[]) {
477 .vmsd = &vmstate_cpu_common_exception_index,
478 .needed = cpu_common_exception_index_needed,
479 } , {
480 /* empty */
485 #endif
487 CPUState *qemu_get_cpu(int index)
489 CPUState *cpu;
491 CPU_FOREACH(cpu) {
492 if (cpu->cpu_index == index) {
493 return cpu;
497 return NULL;
500 #if !defined(CONFIG_USER_ONLY)
501 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
503 /* We only support one address space per cpu at the moment. */
504 assert(cpu->as == as);
506 if (cpu->tcg_as_listener) {
507 memory_listener_unregister(cpu->tcg_as_listener);
508 } else {
509 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
511 cpu->tcg_as_listener->commit = tcg_commit;
512 memory_listener_register(cpu->tcg_as_listener, as);
514 #endif
516 void cpu_exec_init(CPUArchState *env)
518 CPUState *cpu = ENV_GET_CPU(env);
519 CPUClass *cc = CPU_GET_CLASS(cpu);
520 CPUState *some_cpu;
521 int cpu_index;
523 #if defined(CONFIG_USER_ONLY)
524 cpu_list_lock();
525 #endif
526 cpu_index = 0;
527 CPU_FOREACH(some_cpu) {
528 cpu_index++;
530 cpu->cpu_index = cpu_index;
531 cpu->numa_node = 0;
532 QTAILQ_INIT(&cpu->breakpoints);
533 QTAILQ_INIT(&cpu->watchpoints);
534 #ifndef CONFIG_USER_ONLY
535 cpu->as = &address_space_memory;
536 cpu->thread_id = qemu_get_thread_id();
537 #endif
538 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
539 #if defined(CONFIG_USER_ONLY)
540 cpu_list_unlock();
541 #endif
542 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
543 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
545 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
546 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
547 cpu_save, cpu_load, env);
548 assert(cc->vmsd == NULL);
549 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
550 #endif
551 if (cc->vmsd != NULL) {
552 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
556 #if defined(CONFIG_USER_ONLY)
557 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
559 tb_invalidate_phys_page_range(pc, pc + 1, 0);
561 #else
562 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
564 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
565 if (phys != -1) {
566 tb_invalidate_phys_addr(cpu->as,
567 phys | (pc & ~TARGET_PAGE_MASK));
570 #endif
572 #if defined(CONFIG_USER_ONLY)
573 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
578 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
579 int flags)
581 return -ENOSYS;
584 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
588 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
589 int flags, CPUWatchpoint **watchpoint)
591 return -ENOSYS;
593 #else
594 /* Add a watchpoint. */
595 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
596 int flags, CPUWatchpoint **watchpoint)
598 CPUWatchpoint *wp;
600 /* forbid ranges which are empty or run off the end of the address space */
601 if (len == 0 || (addr + len - 1) < addr) {
602 error_report("tried to set invalid watchpoint at %"
603 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
604 return -EINVAL;
606 wp = g_malloc(sizeof(*wp));
608 wp->vaddr = addr;
609 wp->len = len;
610 wp->flags = flags;
612 /* keep all GDB-injected watchpoints in front */
613 if (flags & BP_GDB) {
614 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
615 } else {
616 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
619 tlb_flush_page(cpu, addr);
621 if (watchpoint)
622 *watchpoint = wp;
623 return 0;
626 /* Remove a specific watchpoint. */
627 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
628 int flags)
630 CPUWatchpoint *wp;
632 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
633 if (addr == wp->vaddr && len == wp->len
634 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
635 cpu_watchpoint_remove_by_ref(cpu, wp);
636 return 0;
639 return -ENOENT;
642 /* Remove a specific watchpoint by reference. */
643 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
645 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
647 tlb_flush_page(cpu, watchpoint->vaddr);
649 g_free(watchpoint);
652 /* Remove all matching watchpoints. */
653 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
655 CPUWatchpoint *wp, *next;
657 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
658 if (wp->flags & mask) {
659 cpu_watchpoint_remove_by_ref(cpu, wp);
664 /* Return true if this watchpoint address matches the specified
665 * access (ie the address range covered by the watchpoint overlaps
666 * partially or completely with the address range covered by the
667 * access).
669 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
670 vaddr addr,
671 vaddr len)
673 /* We know the lengths are non-zero, but a little caution is
674 * required to avoid errors in the case where the range ends
675 * exactly at the top of the address space and so addr + len
676 * wraps round to zero.
678 vaddr wpend = wp->vaddr + wp->len - 1;
679 vaddr addrend = addr + len - 1;
681 return !(addr > wpend || wp->vaddr > addrend);
684 #endif
686 /* Add a breakpoint. */
687 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
688 CPUBreakpoint **breakpoint)
690 CPUBreakpoint *bp;
692 bp = g_malloc(sizeof(*bp));
694 bp->pc = pc;
695 bp->flags = flags;
697 /* keep all GDB-injected breakpoints in front */
698 if (flags & BP_GDB) {
699 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
700 } else {
701 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
704 breakpoint_invalidate(cpu, pc);
706 if (breakpoint) {
707 *breakpoint = bp;
709 return 0;
712 /* Remove a specific breakpoint. */
713 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
715 CPUBreakpoint *bp;
717 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
718 if (bp->pc == pc && bp->flags == flags) {
719 cpu_breakpoint_remove_by_ref(cpu, bp);
720 return 0;
723 return -ENOENT;
726 /* Remove a specific breakpoint by reference. */
727 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
729 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
731 breakpoint_invalidate(cpu, breakpoint->pc);
733 g_free(breakpoint);
736 /* Remove all matching breakpoints. */
737 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
739 CPUBreakpoint *bp, *next;
741 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
742 if (bp->flags & mask) {
743 cpu_breakpoint_remove_by_ref(cpu, bp);
748 /* enable or disable single step mode. EXCP_DEBUG is returned by the
749 CPU loop after each instruction */
750 void cpu_single_step(CPUState *cpu, int enabled)
752 if (cpu->singlestep_enabled != enabled) {
753 cpu->singlestep_enabled = enabled;
754 if (kvm_enabled()) {
755 kvm_update_guest_debug(cpu, 0);
756 } else {
757 /* must flush all the translated code to avoid inconsistencies */
758 /* XXX: only flush what is necessary */
759 CPUArchState *env = cpu->env_ptr;
760 tb_flush(env);
765 void cpu_abort(CPUState *cpu, const char *fmt, ...)
767 va_list ap;
768 va_list ap2;
770 va_start(ap, fmt);
771 va_copy(ap2, ap);
772 fprintf(stderr, "qemu: fatal: ");
773 vfprintf(stderr, fmt, ap);
774 fprintf(stderr, "\n");
775 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
776 if (qemu_log_enabled()) {
777 qemu_log("qemu: fatal: ");
778 qemu_log_vprintf(fmt, ap2);
779 qemu_log("\n");
780 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
781 qemu_log_flush();
782 qemu_log_close();
784 va_end(ap2);
785 va_end(ap);
786 #if defined(CONFIG_USER_ONLY)
788 struct sigaction act;
789 sigfillset(&act.sa_mask);
790 act.sa_handler = SIG_DFL;
791 sigaction(SIGABRT, &act, NULL);
793 #endif
794 abort();
797 #if !defined(CONFIG_USER_ONLY)
798 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
800 RAMBlock *block;
802 /* The list is protected by the iothread lock here. */
803 block = ram_list.mru_block;
804 if (block && addr - block->offset < block->max_length) {
805 goto found;
807 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
808 if (addr - block->offset < block->max_length) {
809 goto found;
813 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
814 abort();
816 found:
817 ram_list.mru_block = block;
818 return block;
821 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
823 ram_addr_t start1;
824 RAMBlock *block;
825 ram_addr_t end;
827 end = TARGET_PAGE_ALIGN(start + length);
828 start &= TARGET_PAGE_MASK;
830 block = qemu_get_ram_block(start);
831 assert(block == qemu_get_ram_block(end - 1));
832 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
833 cpu_tlb_reset_dirty_all(start1, length);
836 /* Note: start and end must be within the same ram block. */
837 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
838 unsigned client)
840 if (length == 0)
841 return;
842 cpu_physical_memory_clear_dirty_range_type(start, length, client);
844 if (tcg_enabled()) {
845 tlb_reset_dirty_range_all(start, length);
849 static void cpu_physical_memory_set_dirty_tracking(bool enable)
851 in_migration = enable;
854 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
855 MemoryRegionSection *section,
856 target_ulong vaddr,
857 hwaddr paddr, hwaddr xlat,
858 int prot,
859 target_ulong *address)
861 hwaddr iotlb;
862 CPUWatchpoint *wp;
864 if (memory_region_is_ram(section->mr)) {
865 /* Normal RAM. */
866 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
867 + xlat;
868 if (!section->readonly) {
869 iotlb |= PHYS_SECTION_NOTDIRTY;
870 } else {
871 iotlb |= PHYS_SECTION_ROM;
873 } else {
874 iotlb = section - section->address_space->dispatch->map.sections;
875 iotlb += xlat;
878 /* Make accesses to pages with watchpoints go via the
879 watchpoint trap routines. */
880 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
881 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
882 /* Avoid trapping reads of pages with a write breakpoint. */
883 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
884 iotlb = PHYS_SECTION_WATCH + paddr;
885 *address |= TLB_MMIO;
886 break;
891 return iotlb;
893 #endif /* defined(CONFIG_USER_ONLY) */
895 #if !defined(CONFIG_USER_ONLY)
897 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
898 uint16_t section);
899 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
901 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
902 qemu_anon_ram_alloc;
905 * Set a custom physical guest memory alloator.
906 * Accelerators with unusual needs may need this. Hopefully, we can
907 * get rid of it eventually.
909 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
911 phys_mem_alloc = alloc;
914 static uint16_t phys_section_add(PhysPageMap *map,
915 MemoryRegionSection *section)
917 /* The physical section number is ORed with a page-aligned
918 * pointer to produce the iotlb entries. Thus it should
919 * never overflow into the page-aligned value.
921 assert(map->sections_nb < TARGET_PAGE_SIZE);
923 if (map->sections_nb == map->sections_nb_alloc) {
924 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
925 map->sections = g_renew(MemoryRegionSection, map->sections,
926 map->sections_nb_alloc);
928 map->sections[map->sections_nb] = *section;
929 memory_region_ref(section->mr);
930 return map->sections_nb++;
933 static void phys_section_destroy(MemoryRegion *mr)
935 memory_region_unref(mr);
937 if (mr->subpage) {
938 subpage_t *subpage = container_of(mr, subpage_t, iomem);
939 object_unref(OBJECT(&subpage->iomem));
940 g_free(subpage);
944 static void phys_sections_free(PhysPageMap *map)
946 while (map->sections_nb > 0) {
947 MemoryRegionSection *section = &map->sections[--map->sections_nb];
948 phys_section_destroy(section->mr);
950 g_free(map->sections);
951 g_free(map->nodes);
954 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
956 subpage_t *subpage;
957 hwaddr base = section->offset_within_address_space
958 & TARGET_PAGE_MASK;
959 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
960 d->map.nodes, d->map.sections);
961 MemoryRegionSection subsection = {
962 .offset_within_address_space = base,
963 .size = int128_make64(TARGET_PAGE_SIZE),
965 hwaddr start, end;
967 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
969 if (!(existing->mr->subpage)) {
970 subpage = subpage_init(d->as, base);
971 subsection.address_space = d->as;
972 subsection.mr = &subpage->iomem;
973 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
974 phys_section_add(&d->map, &subsection));
975 } else {
976 subpage = container_of(existing->mr, subpage_t, iomem);
978 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
979 end = start + int128_get64(section->size) - 1;
980 subpage_register(subpage, start, end,
981 phys_section_add(&d->map, section));
985 static void register_multipage(AddressSpaceDispatch *d,
986 MemoryRegionSection *section)
988 hwaddr start_addr = section->offset_within_address_space;
989 uint16_t section_index = phys_section_add(&d->map, section);
990 uint64_t num_pages = int128_get64(int128_rshift(section->size,
991 TARGET_PAGE_BITS));
993 assert(num_pages);
994 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
997 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
999 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1000 AddressSpaceDispatch *d = as->next_dispatch;
1001 MemoryRegionSection now = *section, remain = *section;
1002 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1004 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1005 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1006 - now.offset_within_address_space;
1008 now.size = int128_min(int128_make64(left), now.size);
1009 register_subpage(d, &now);
1010 } else {
1011 now.size = int128_zero();
1013 while (int128_ne(remain.size, now.size)) {
1014 remain.size = int128_sub(remain.size, now.size);
1015 remain.offset_within_address_space += int128_get64(now.size);
1016 remain.offset_within_region += int128_get64(now.size);
1017 now = remain;
1018 if (int128_lt(remain.size, page_size)) {
1019 register_subpage(d, &now);
1020 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1021 now.size = page_size;
1022 register_subpage(d, &now);
1023 } else {
1024 now.size = int128_and(now.size, int128_neg(page_size));
1025 register_multipage(d, &now);
1030 void qemu_flush_coalesced_mmio_buffer(void)
1032 if (kvm_enabled())
1033 kvm_flush_coalesced_mmio_buffer();
1036 void qemu_mutex_lock_ramlist(void)
1038 qemu_mutex_lock(&ram_list.mutex);
1041 void qemu_mutex_unlock_ramlist(void)
1043 qemu_mutex_unlock(&ram_list.mutex);
1046 #ifdef __linux__
1048 #include <sys/vfs.h>
1050 #define HUGETLBFS_MAGIC 0x958458f6
1052 static long gethugepagesize(const char *path, Error **errp)
1054 struct statfs fs;
1055 int ret;
1057 do {
1058 ret = statfs(path, &fs);
1059 } while (ret != 0 && errno == EINTR);
1061 if (ret != 0) {
1062 error_setg_errno(errp, errno, "failed to get page size of file %s",
1063 path);
1064 return 0;
1067 if (fs.f_type != HUGETLBFS_MAGIC)
1068 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1070 return fs.f_bsize;
1073 static void *file_ram_alloc(RAMBlock *block,
1074 ram_addr_t memory,
1075 const char *path,
1076 Error **errp)
1078 char *filename;
1079 char *sanitized_name;
1080 char *c;
1081 void *area = NULL;
1082 int fd;
1083 uint64_t hpagesize;
1084 Error *local_err = NULL;
1086 hpagesize = gethugepagesize(path, &local_err);
1087 if (local_err) {
1088 error_propagate(errp, local_err);
1089 goto error;
1091 block->mr->align = hpagesize;
1093 if (memory < hpagesize) {
1094 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1095 "or larger than huge page size 0x%" PRIx64,
1096 memory, hpagesize);
1097 goto error;
1100 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1101 error_setg(errp,
1102 "host lacks kvm mmu notifiers, -mem-path unsupported");
1103 goto error;
1106 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1107 sanitized_name = g_strdup(memory_region_name(block->mr));
1108 for (c = sanitized_name; *c != '\0'; c++) {
1109 if (*c == '/')
1110 *c = '_';
1113 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1114 sanitized_name);
1115 g_free(sanitized_name);
1117 fd = mkstemp(filename);
1118 if (fd < 0) {
1119 error_setg_errno(errp, errno,
1120 "unable to create backing store for hugepages");
1121 g_free(filename);
1122 goto error;
1124 unlink(filename);
1125 g_free(filename);
1127 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1130 * ftruncate is not supported by hugetlbfs in older
1131 * hosts, so don't bother bailing out on errors.
1132 * If anything goes wrong with it under other filesystems,
1133 * mmap will fail.
1135 if (ftruncate(fd, memory)) {
1136 perror("ftruncate");
1139 area = mmap(0, memory, PROT_READ | PROT_WRITE,
1140 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
1141 fd, 0);
1142 if (area == MAP_FAILED) {
1143 error_setg_errno(errp, errno,
1144 "unable to map backing store for hugepages");
1145 close(fd);
1146 goto error;
1149 if (mem_prealloc) {
1150 os_mem_prealloc(fd, area, memory);
1153 block->fd = fd;
1154 return area;
1156 error:
1157 if (mem_prealloc) {
1158 error_report("%s\n", error_get_pretty(*errp));
1159 exit(1);
1161 return NULL;
1163 #endif
1165 static ram_addr_t find_ram_offset(ram_addr_t size)
1167 RAMBlock *block, *next_block;
1168 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1170 assert(size != 0); /* it would hand out same offset multiple times */
1172 if (QTAILQ_EMPTY(&ram_list.blocks))
1173 return 0;
1175 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1176 ram_addr_t end, next = RAM_ADDR_MAX;
1178 end = block->offset + block->max_length;
1180 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1181 if (next_block->offset >= end) {
1182 next = MIN(next, next_block->offset);
1185 if (next - end >= size && next - end < mingap) {
1186 offset = end;
1187 mingap = next - end;
1191 if (offset == RAM_ADDR_MAX) {
1192 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1193 (uint64_t)size);
1194 abort();
1197 return offset;
1200 ram_addr_t last_ram_offset(void)
1202 RAMBlock *block;
1203 ram_addr_t last = 0;
1205 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1206 last = MAX(last, block->offset + block->max_length);
1208 return last;
1211 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1213 int ret;
1215 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1216 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1217 "dump-guest-core", true)) {
1218 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1219 if (ret) {
1220 perror("qemu_madvise");
1221 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1222 "but dump_guest_core=off specified\n");
1227 static RAMBlock *find_ram_block(ram_addr_t addr)
1229 RAMBlock *block;
1231 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1232 if (block->offset == addr) {
1233 return block;
1237 return NULL;
1240 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1242 RAMBlock *new_block = find_ram_block(addr);
1243 RAMBlock *block;
1245 assert(new_block);
1246 assert(!new_block->idstr[0]);
1248 if (dev) {
1249 char *id = qdev_get_dev_path(dev);
1250 if (id) {
1251 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1252 g_free(id);
1255 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1257 /* This assumes the iothread lock is taken here too. */
1258 qemu_mutex_lock_ramlist();
1259 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1260 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1261 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1262 new_block->idstr);
1263 abort();
1266 qemu_mutex_unlock_ramlist();
1269 void qemu_ram_unset_idstr(ram_addr_t addr)
1271 RAMBlock *block = find_ram_block(addr);
1273 if (block) {
1274 memset(block->idstr, 0, sizeof(block->idstr));
1278 static int memory_try_enable_merging(void *addr, size_t len)
1280 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1281 /* disabled by the user */
1282 return 0;
1285 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1288 /* Only legal before guest might have detected the memory size: e.g. on
1289 * incoming migration, or right after reset.
1291 * As memory core doesn't know how is memory accessed, it is up to
1292 * resize callback to update device state and/or add assertions to detect
1293 * misuse, if necessary.
1295 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1297 RAMBlock *block = find_ram_block(base);
1299 assert(block);
1301 if (block->used_length == newsize) {
1302 return 0;
1305 if (!(block->flags & RAM_RESIZEABLE)) {
1306 error_setg_errno(errp, EINVAL,
1307 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1308 " in != 0x" RAM_ADDR_FMT, block->idstr,
1309 newsize, block->used_length);
1310 return -EINVAL;
1313 if (block->max_length < newsize) {
1314 error_setg_errno(errp, EINVAL,
1315 "Length too large: %s: 0x" RAM_ADDR_FMT
1316 " > 0x" RAM_ADDR_FMT, block->idstr,
1317 newsize, block->max_length);
1318 return -EINVAL;
1321 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1322 block->used_length = newsize;
1323 cpu_physical_memory_set_dirty_range(block->offset, block->used_length);
1324 memory_region_set_size(block->mr, newsize);
1325 if (block->resized) {
1326 block->resized(block->idstr, newsize, block->host);
1328 return 0;
1331 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1333 RAMBlock *block;
1334 ram_addr_t old_ram_size, new_ram_size;
1336 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1338 /* This assumes the iothread lock is taken here too. */
1339 qemu_mutex_lock_ramlist();
1340 new_block->offset = find_ram_offset(new_block->max_length);
1342 if (!new_block->host) {
1343 if (xen_enabled()) {
1344 xen_ram_alloc(new_block->offset, new_block->max_length,
1345 new_block->mr);
1346 } else {
1347 new_block->host = phys_mem_alloc(new_block->max_length,
1348 &new_block->mr->align);
1349 if (!new_block->host) {
1350 error_setg_errno(errp, errno,
1351 "cannot set up guest memory '%s'",
1352 memory_region_name(new_block->mr));
1353 qemu_mutex_unlock_ramlist();
1354 return -1;
1356 memory_try_enable_merging(new_block->host, new_block->max_length);
1360 /* Keep the list sorted from biggest to smallest block. */
1361 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1362 if (block->max_length < new_block->max_length) {
1363 break;
1366 if (block) {
1367 QTAILQ_INSERT_BEFORE(block, new_block, next);
1368 } else {
1369 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1371 ram_list.mru_block = NULL;
1373 ram_list.version++;
1374 qemu_mutex_unlock_ramlist();
1376 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1378 if (new_ram_size > old_ram_size) {
1379 int i;
1380 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1381 ram_list.dirty_memory[i] =
1382 bitmap_zero_extend(ram_list.dirty_memory[i],
1383 old_ram_size, new_ram_size);
1386 cpu_physical_memory_set_dirty_range(new_block->offset,
1387 new_block->used_length);
1389 if (new_block->host) {
1390 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1391 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1392 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1393 if (kvm_enabled()) {
1394 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1398 return new_block->offset;
1401 #ifdef __linux__
1402 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1403 bool share, const char *mem_path,
1404 Error **errp)
1406 RAMBlock *new_block;
1407 ram_addr_t addr;
1408 Error *local_err = NULL;
1410 if (xen_enabled()) {
1411 error_setg(errp, "-mem-path not supported with Xen");
1412 return -1;
1415 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1417 * file_ram_alloc() needs to allocate just like
1418 * phys_mem_alloc, but we haven't bothered to provide
1419 * a hook there.
1421 error_setg(errp,
1422 "-mem-path not supported with this accelerator");
1423 return -1;
1426 size = TARGET_PAGE_ALIGN(size);
1427 new_block = g_malloc0(sizeof(*new_block));
1428 new_block->mr = mr;
1429 new_block->used_length = size;
1430 new_block->max_length = size;
1431 new_block->flags = share ? RAM_SHARED : 0;
1432 new_block->host = file_ram_alloc(new_block, size,
1433 mem_path, errp);
1434 if (!new_block->host) {
1435 g_free(new_block);
1436 return -1;
1439 addr = ram_block_add(new_block, &local_err);
1440 if (local_err) {
1441 g_free(new_block);
1442 error_propagate(errp, local_err);
1443 return -1;
1445 return addr;
1447 #endif
1449 static
1450 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1451 void (*resized)(const char*,
1452 uint64_t length,
1453 void *host),
1454 void *host, bool resizeable,
1455 MemoryRegion *mr, Error **errp)
1457 RAMBlock *new_block;
1458 ram_addr_t addr;
1459 Error *local_err = NULL;
1461 size = TARGET_PAGE_ALIGN(size);
1462 max_size = TARGET_PAGE_ALIGN(max_size);
1463 new_block = g_malloc0(sizeof(*new_block));
1464 new_block->mr = mr;
1465 new_block->resized = resized;
1466 new_block->used_length = size;
1467 new_block->max_length = max_size;
1468 assert(max_size >= size);
1469 new_block->fd = -1;
1470 new_block->host = host;
1471 if (host) {
1472 new_block->flags |= RAM_PREALLOC;
1474 if (resizeable) {
1475 new_block->flags |= RAM_RESIZEABLE;
1477 addr = ram_block_add(new_block, &local_err);
1478 if (local_err) {
1479 g_free(new_block);
1480 error_propagate(errp, local_err);
1481 return -1;
1483 return addr;
1486 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1487 MemoryRegion *mr, Error **errp)
1489 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1492 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1494 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1497 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1498 void (*resized)(const char*,
1499 uint64_t length,
1500 void *host),
1501 MemoryRegion *mr, Error **errp)
1503 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1506 void qemu_ram_free_from_ptr(ram_addr_t addr)
1508 RAMBlock *block;
1510 /* This assumes the iothread lock is taken here too. */
1511 qemu_mutex_lock_ramlist();
1512 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1513 if (addr == block->offset) {
1514 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1515 ram_list.mru_block = NULL;
1516 ram_list.version++;
1517 g_free(block);
1518 break;
1521 qemu_mutex_unlock_ramlist();
1524 void qemu_ram_free(ram_addr_t addr)
1526 RAMBlock *block;
1528 /* This assumes the iothread lock is taken here too. */
1529 qemu_mutex_lock_ramlist();
1530 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1531 if (addr == block->offset) {
1532 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1533 ram_list.mru_block = NULL;
1534 ram_list.version++;
1535 if (block->flags & RAM_PREALLOC) {
1537 } else if (xen_enabled()) {
1538 xen_invalidate_map_cache_entry(block->host);
1539 #ifndef _WIN32
1540 } else if (block->fd >= 0) {
1541 munmap(block->host, block->max_length);
1542 close(block->fd);
1543 #endif
1544 } else {
1545 qemu_anon_ram_free(block->host, block->max_length);
1547 g_free(block);
1548 break;
1551 qemu_mutex_unlock_ramlist();
1555 #ifndef _WIN32
1556 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1558 RAMBlock *block;
1559 ram_addr_t offset;
1560 int flags;
1561 void *area, *vaddr;
1563 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1564 offset = addr - block->offset;
1565 if (offset < block->max_length) {
1566 vaddr = ramblock_ptr(block, offset);
1567 if (block->flags & RAM_PREALLOC) {
1569 } else if (xen_enabled()) {
1570 abort();
1571 } else {
1572 flags = MAP_FIXED;
1573 munmap(vaddr, length);
1574 if (block->fd >= 0) {
1575 flags |= (block->flags & RAM_SHARED ?
1576 MAP_SHARED : MAP_PRIVATE);
1577 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1578 flags, block->fd, offset);
1579 } else {
1581 * Remap needs to match alloc. Accelerators that
1582 * set phys_mem_alloc never remap. If they did,
1583 * we'd need a remap hook here.
1585 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1587 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1588 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1589 flags, -1, 0);
1591 if (area != vaddr) {
1592 fprintf(stderr, "Could not remap addr: "
1593 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1594 length, addr);
1595 exit(1);
1597 memory_try_enable_merging(vaddr, length);
1598 qemu_ram_setup_dump(vaddr, length);
1600 return;
1604 #endif /* !_WIN32 */
1606 int qemu_get_ram_fd(ram_addr_t addr)
1608 RAMBlock *block = qemu_get_ram_block(addr);
1610 return block->fd;
1613 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1615 RAMBlock *block = qemu_get_ram_block(addr);
1617 return ramblock_ptr(block, 0);
1620 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1621 With the exception of the softmmu code in this file, this should
1622 only be used for local memory (e.g. video ram) that the device owns,
1623 and knows it isn't going to access beyond the end of the block.
1625 It should not be used for general purpose DMA.
1626 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1628 void *qemu_get_ram_ptr(ram_addr_t addr)
1630 RAMBlock *block = qemu_get_ram_block(addr);
1632 if (xen_enabled()) {
1633 /* We need to check if the requested address is in the RAM
1634 * because we don't want to map the entire memory in QEMU.
1635 * In that case just map until the end of the page.
1637 if (block->offset == 0) {
1638 return xen_map_cache(addr, 0, 0);
1639 } else if (block->host == NULL) {
1640 block->host =
1641 xen_map_cache(block->offset, block->max_length, 1);
1644 return ramblock_ptr(block, addr - block->offset);
1647 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1648 * but takes a size argument */
1649 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1651 if (*size == 0) {
1652 return NULL;
1654 if (xen_enabled()) {
1655 return xen_map_cache(addr, *size, 1);
1656 } else {
1657 RAMBlock *block;
1659 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1660 if (addr - block->offset < block->max_length) {
1661 if (addr - block->offset + *size > block->max_length)
1662 *size = block->max_length - addr + block->offset;
1663 return ramblock_ptr(block, addr - block->offset);
1667 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1668 abort();
1672 /* Some of the softmmu routines need to translate from a host pointer
1673 (typically a TLB entry) back to a ram offset. */
1674 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1676 RAMBlock *block;
1677 uint8_t *host = ptr;
1679 if (xen_enabled()) {
1680 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1681 return qemu_get_ram_block(*ram_addr)->mr;
1684 block = ram_list.mru_block;
1685 if (block && block->host && host - block->host < block->max_length) {
1686 goto found;
1689 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1690 /* This case append when the block is not mapped. */
1691 if (block->host == NULL) {
1692 continue;
1694 if (host - block->host < block->max_length) {
1695 goto found;
1699 return NULL;
1701 found:
1702 *ram_addr = block->offset + (host - block->host);
1703 return block->mr;
1706 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1707 uint64_t val, unsigned size)
1709 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1710 tb_invalidate_phys_page_fast(ram_addr, size);
1712 switch (size) {
1713 case 1:
1714 stb_p(qemu_get_ram_ptr(ram_addr), val);
1715 break;
1716 case 2:
1717 stw_p(qemu_get_ram_ptr(ram_addr), val);
1718 break;
1719 case 4:
1720 stl_p(qemu_get_ram_ptr(ram_addr), val);
1721 break;
1722 default:
1723 abort();
1725 cpu_physical_memory_set_dirty_range_nocode(ram_addr, size);
1726 /* we remove the notdirty callback only if the code has been
1727 flushed */
1728 if (!cpu_physical_memory_is_clean(ram_addr)) {
1729 CPUArchState *env = current_cpu->env_ptr;
1730 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1734 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1735 unsigned size, bool is_write)
1737 return is_write;
1740 static const MemoryRegionOps notdirty_mem_ops = {
1741 .write = notdirty_mem_write,
1742 .valid.accepts = notdirty_mem_accepts,
1743 .endianness = DEVICE_NATIVE_ENDIAN,
1746 /* Generate a debug exception if a watchpoint has been hit. */
1747 static void check_watchpoint(int offset, int len, int flags)
1749 CPUState *cpu = current_cpu;
1750 CPUArchState *env = cpu->env_ptr;
1751 target_ulong pc, cs_base;
1752 target_ulong vaddr;
1753 CPUWatchpoint *wp;
1754 int cpu_flags;
1756 if (cpu->watchpoint_hit) {
1757 /* We re-entered the check after replacing the TB. Now raise
1758 * the debug interrupt so that is will trigger after the
1759 * current instruction. */
1760 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1761 return;
1763 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1764 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1765 if (cpu_watchpoint_address_matches(wp, vaddr, len)
1766 && (wp->flags & flags)) {
1767 if (flags == BP_MEM_READ) {
1768 wp->flags |= BP_WATCHPOINT_HIT_READ;
1769 } else {
1770 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
1772 wp->hitaddr = vaddr;
1773 if (!cpu->watchpoint_hit) {
1774 cpu->watchpoint_hit = wp;
1775 tb_check_watchpoint(cpu);
1776 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1777 cpu->exception_index = EXCP_DEBUG;
1778 cpu_loop_exit(cpu);
1779 } else {
1780 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1781 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1782 cpu_resume_from_signal(cpu, NULL);
1785 } else {
1786 wp->flags &= ~BP_WATCHPOINT_HIT;
1791 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1792 so these check for a hit then pass through to the normal out-of-line
1793 phys routines. */
1794 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1795 unsigned size)
1797 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_READ);
1798 switch (size) {
1799 case 1: return ldub_phys(&address_space_memory, addr);
1800 case 2: return lduw_phys(&address_space_memory, addr);
1801 case 4: return ldl_phys(&address_space_memory, addr);
1802 default: abort();
1806 static void watch_mem_write(void *opaque, hwaddr addr,
1807 uint64_t val, unsigned size)
1809 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_WRITE);
1810 switch (size) {
1811 case 1:
1812 stb_phys(&address_space_memory, addr, val);
1813 break;
1814 case 2:
1815 stw_phys(&address_space_memory, addr, val);
1816 break;
1817 case 4:
1818 stl_phys(&address_space_memory, addr, val);
1819 break;
1820 default: abort();
1824 static const MemoryRegionOps watch_mem_ops = {
1825 .read = watch_mem_read,
1826 .write = watch_mem_write,
1827 .endianness = DEVICE_NATIVE_ENDIAN,
1830 static uint64_t subpage_read(void *opaque, hwaddr addr,
1831 unsigned len)
1833 subpage_t *subpage = opaque;
1834 uint8_t buf[8];
1836 #if defined(DEBUG_SUBPAGE)
1837 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1838 subpage, len, addr);
1839 #endif
1840 address_space_read(subpage->as, addr + subpage->base, buf, len);
1841 switch (len) {
1842 case 1:
1843 return ldub_p(buf);
1844 case 2:
1845 return lduw_p(buf);
1846 case 4:
1847 return ldl_p(buf);
1848 case 8:
1849 return ldq_p(buf);
1850 default:
1851 abort();
1855 static void subpage_write(void *opaque, hwaddr addr,
1856 uint64_t value, unsigned len)
1858 subpage_t *subpage = opaque;
1859 uint8_t buf[8];
1861 #if defined(DEBUG_SUBPAGE)
1862 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1863 " value %"PRIx64"\n",
1864 __func__, subpage, len, addr, value);
1865 #endif
1866 switch (len) {
1867 case 1:
1868 stb_p(buf, value);
1869 break;
1870 case 2:
1871 stw_p(buf, value);
1872 break;
1873 case 4:
1874 stl_p(buf, value);
1875 break;
1876 case 8:
1877 stq_p(buf, value);
1878 break;
1879 default:
1880 abort();
1882 address_space_write(subpage->as, addr + subpage->base, buf, len);
1885 static bool subpage_accepts(void *opaque, hwaddr addr,
1886 unsigned len, bool is_write)
1888 subpage_t *subpage = opaque;
1889 #if defined(DEBUG_SUBPAGE)
1890 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1891 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1892 #endif
1894 return address_space_access_valid(subpage->as, addr + subpage->base,
1895 len, is_write);
1898 static const MemoryRegionOps subpage_ops = {
1899 .read = subpage_read,
1900 .write = subpage_write,
1901 .impl.min_access_size = 1,
1902 .impl.max_access_size = 8,
1903 .valid.min_access_size = 1,
1904 .valid.max_access_size = 8,
1905 .valid.accepts = subpage_accepts,
1906 .endianness = DEVICE_NATIVE_ENDIAN,
1909 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1910 uint16_t section)
1912 int idx, eidx;
1914 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1915 return -1;
1916 idx = SUBPAGE_IDX(start);
1917 eidx = SUBPAGE_IDX(end);
1918 #if defined(DEBUG_SUBPAGE)
1919 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1920 __func__, mmio, start, end, idx, eidx, section);
1921 #endif
1922 for (; idx <= eidx; idx++) {
1923 mmio->sub_section[idx] = section;
1926 return 0;
1929 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1931 subpage_t *mmio;
1933 mmio = g_malloc0(sizeof(subpage_t));
1935 mmio->as = as;
1936 mmio->base = base;
1937 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1938 NULL, TARGET_PAGE_SIZE);
1939 mmio->iomem.subpage = true;
1940 #if defined(DEBUG_SUBPAGE)
1941 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1942 mmio, base, TARGET_PAGE_SIZE);
1943 #endif
1944 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1946 return mmio;
1949 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
1950 MemoryRegion *mr)
1952 assert(as);
1953 MemoryRegionSection section = {
1954 .address_space = as,
1955 .mr = mr,
1956 .offset_within_address_space = 0,
1957 .offset_within_region = 0,
1958 .size = int128_2_64(),
1961 return phys_section_add(map, &section);
1964 MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1966 return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1969 static void io_mem_init(void)
1971 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
1972 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1973 NULL, UINT64_MAX);
1974 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1975 NULL, UINT64_MAX);
1976 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1977 NULL, UINT64_MAX);
1980 static void mem_begin(MemoryListener *listener)
1982 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1983 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1984 uint16_t n;
1986 n = dummy_section(&d->map, as, &io_mem_unassigned);
1987 assert(n == PHYS_SECTION_UNASSIGNED);
1988 n = dummy_section(&d->map, as, &io_mem_notdirty);
1989 assert(n == PHYS_SECTION_NOTDIRTY);
1990 n = dummy_section(&d->map, as, &io_mem_rom);
1991 assert(n == PHYS_SECTION_ROM);
1992 n = dummy_section(&d->map, as, &io_mem_watch);
1993 assert(n == PHYS_SECTION_WATCH);
1995 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1996 d->as = as;
1997 as->next_dispatch = d;
2000 static void mem_commit(MemoryListener *listener)
2002 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2003 AddressSpaceDispatch *cur = as->dispatch;
2004 AddressSpaceDispatch *next = as->next_dispatch;
2006 phys_page_compact_all(next, next->map.nodes_nb);
2008 as->dispatch = next;
2010 if (cur) {
2011 phys_sections_free(&cur->map);
2012 g_free(cur);
2016 static void tcg_commit(MemoryListener *listener)
2018 CPUState *cpu;
2020 /* since each CPU stores ram addresses in its TLB cache, we must
2021 reset the modified entries */
2022 /* XXX: slow ! */
2023 CPU_FOREACH(cpu) {
2024 /* FIXME: Disentangle the cpu.h circular files deps so we can
2025 directly get the right CPU from listener. */
2026 if (cpu->tcg_as_listener != listener) {
2027 continue;
2029 tlb_flush(cpu, 1);
2033 static void core_log_global_start(MemoryListener *listener)
2035 cpu_physical_memory_set_dirty_tracking(true);
2038 static void core_log_global_stop(MemoryListener *listener)
2040 cpu_physical_memory_set_dirty_tracking(false);
2043 static MemoryListener core_memory_listener = {
2044 .log_global_start = core_log_global_start,
2045 .log_global_stop = core_log_global_stop,
2046 .priority = 1,
2049 void address_space_init_dispatch(AddressSpace *as)
2051 as->dispatch = NULL;
2052 as->dispatch_listener = (MemoryListener) {
2053 .begin = mem_begin,
2054 .commit = mem_commit,
2055 .region_add = mem_add,
2056 .region_nop = mem_add,
2057 .priority = 0,
2059 memory_listener_register(&as->dispatch_listener, as);
2062 void address_space_unregister(AddressSpace *as)
2064 memory_listener_unregister(&as->dispatch_listener);
2067 void address_space_destroy_dispatch(AddressSpace *as)
2069 AddressSpaceDispatch *d = as->dispatch;
2071 g_free(d);
2072 as->dispatch = NULL;
2075 static void memory_map_init(void)
2077 system_memory = g_malloc(sizeof(*system_memory));
2079 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2080 address_space_init(&address_space_memory, system_memory, "memory");
2082 system_io = g_malloc(sizeof(*system_io));
2083 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2084 65536);
2085 address_space_init(&address_space_io, system_io, "I/O");
2087 memory_listener_register(&core_memory_listener, &address_space_memory);
2090 MemoryRegion *get_system_memory(void)
2092 return system_memory;
2095 MemoryRegion *get_system_io(void)
2097 return system_io;
2100 #endif /* !defined(CONFIG_USER_ONLY) */
2102 /* physical memory access (slow version, mainly for debug) */
2103 #if defined(CONFIG_USER_ONLY)
2104 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2105 uint8_t *buf, int len, int is_write)
2107 int l, flags;
2108 target_ulong page;
2109 void * p;
2111 while (len > 0) {
2112 page = addr & TARGET_PAGE_MASK;
2113 l = (page + TARGET_PAGE_SIZE) - addr;
2114 if (l > len)
2115 l = len;
2116 flags = page_get_flags(page);
2117 if (!(flags & PAGE_VALID))
2118 return -1;
2119 if (is_write) {
2120 if (!(flags & PAGE_WRITE))
2121 return -1;
2122 /* XXX: this code should not depend on lock_user */
2123 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2124 return -1;
2125 memcpy(p, buf, l);
2126 unlock_user(p, addr, l);
2127 } else {
2128 if (!(flags & PAGE_READ))
2129 return -1;
2130 /* XXX: this code should not depend on lock_user */
2131 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2132 return -1;
2133 memcpy(buf, p, l);
2134 unlock_user(p, addr, 0);
2136 len -= l;
2137 buf += l;
2138 addr += l;
2140 return 0;
2143 #else
2145 static void invalidate_and_set_dirty(hwaddr addr,
2146 hwaddr length)
2148 if (cpu_physical_memory_range_includes_clean(addr, length)) {
2149 tb_invalidate_phys_range(addr, addr + length, 0);
2150 cpu_physical_memory_set_dirty_range_nocode(addr, length);
2152 xen_modified_memory(addr, length);
2155 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2157 unsigned access_size_max = mr->ops->valid.max_access_size;
2159 /* Regions are assumed to support 1-4 byte accesses unless
2160 otherwise specified. */
2161 if (access_size_max == 0) {
2162 access_size_max = 4;
2165 /* Bound the maximum access by the alignment of the address. */
2166 if (!mr->ops->impl.unaligned) {
2167 unsigned align_size_max = addr & -addr;
2168 if (align_size_max != 0 && align_size_max < access_size_max) {
2169 access_size_max = align_size_max;
2173 /* Don't attempt accesses larger than the maximum. */
2174 if (l > access_size_max) {
2175 l = access_size_max;
2177 if (l & (l - 1)) {
2178 l = 1 << (qemu_fls(l) - 1);
2181 return l;
2184 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
2185 int len, bool is_write)
2187 hwaddr l;
2188 uint8_t *ptr;
2189 uint64_t val;
2190 hwaddr addr1;
2191 MemoryRegion *mr;
2192 bool error = false;
2194 while (len > 0) {
2195 l = len;
2196 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2198 if (is_write) {
2199 if (!memory_access_is_direct(mr, is_write)) {
2200 l = memory_access_size(mr, l, addr1);
2201 /* XXX: could force current_cpu to NULL to avoid
2202 potential bugs */
2203 switch (l) {
2204 case 8:
2205 /* 64 bit write access */
2206 val = ldq_p(buf);
2207 error |= io_mem_write(mr, addr1, val, 8);
2208 break;
2209 case 4:
2210 /* 32 bit write access */
2211 val = ldl_p(buf);
2212 error |= io_mem_write(mr, addr1, val, 4);
2213 break;
2214 case 2:
2215 /* 16 bit write access */
2216 val = lduw_p(buf);
2217 error |= io_mem_write(mr, addr1, val, 2);
2218 break;
2219 case 1:
2220 /* 8 bit write access */
2221 val = ldub_p(buf);
2222 error |= io_mem_write(mr, addr1, val, 1);
2223 break;
2224 default:
2225 abort();
2227 } else {
2228 addr1 += memory_region_get_ram_addr(mr);
2229 /* RAM case */
2230 ptr = qemu_get_ram_ptr(addr1);
2231 memcpy(ptr, buf, l);
2232 invalidate_and_set_dirty(addr1, l);
2234 } else {
2235 if (!memory_access_is_direct(mr, is_write)) {
2236 /* I/O case */
2237 l = memory_access_size(mr, l, addr1);
2238 switch (l) {
2239 case 8:
2240 /* 64 bit read access */
2241 error |= io_mem_read(mr, addr1, &val, 8);
2242 stq_p(buf, val);
2243 break;
2244 case 4:
2245 /* 32 bit read access */
2246 error |= io_mem_read(mr, addr1, &val, 4);
2247 stl_p(buf, val);
2248 break;
2249 case 2:
2250 /* 16 bit read access */
2251 error |= io_mem_read(mr, addr1, &val, 2);
2252 stw_p(buf, val);
2253 break;
2254 case 1:
2255 /* 8 bit read access */
2256 error |= io_mem_read(mr, addr1, &val, 1);
2257 stb_p(buf, val);
2258 break;
2259 default:
2260 abort();
2262 } else {
2263 /* RAM case */
2264 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2265 memcpy(buf, ptr, l);
2268 len -= l;
2269 buf += l;
2270 addr += l;
2273 return error;
2276 bool address_space_write(AddressSpace *as, hwaddr addr,
2277 const uint8_t *buf, int len)
2279 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2282 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2284 return address_space_rw(as, addr, buf, len, false);
2288 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2289 int len, int is_write)
2291 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2294 enum write_rom_type {
2295 WRITE_DATA,
2296 FLUSH_CACHE,
2299 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2300 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2302 hwaddr l;
2303 uint8_t *ptr;
2304 hwaddr addr1;
2305 MemoryRegion *mr;
2307 while (len > 0) {
2308 l = len;
2309 mr = address_space_translate(as, addr, &addr1, &l, true);
2311 if (!(memory_region_is_ram(mr) ||
2312 memory_region_is_romd(mr))) {
2313 /* do nothing */
2314 } else {
2315 addr1 += memory_region_get_ram_addr(mr);
2316 /* ROM/RAM case */
2317 ptr = qemu_get_ram_ptr(addr1);
2318 switch (type) {
2319 case WRITE_DATA:
2320 memcpy(ptr, buf, l);
2321 invalidate_and_set_dirty(addr1, l);
2322 break;
2323 case FLUSH_CACHE:
2324 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2325 break;
2328 len -= l;
2329 buf += l;
2330 addr += l;
2334 /* used for ROM loading : can write in RAM and ROM */
2335 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2336 const uint8_t *buf, int len)
2338 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2341 void cpu_flush_icache_range(hwaddr start, int len)
2344 * This function should do the same thing as an icache flush that was
2345 * triggered from within the guest. For TCG we are always cache coherent,
2346 * so there is no need to flush anything. For KVM / Xen we need to flush
2347 * the host's instruction cache at least.
2349 if (tcg_enabled()) {
2350 return;
2353 cpu_physical_memory_write_rom_internal(&address_space_memory,
2354 start, NULL, len, FLUSH_CACHE);
2357 typedef struct {
2358 MemoryRegion *mr;
2359 void *buffer;
2360 hwaddr addr;
2361 hwaddr len;
2362 } BounceBuffer;
2364 static BounceBuffer bounce;
2366 typedef struct MapClient {
2367 void *opaque;
2368 void (*callback)(void *opaque);
2369 QLIST_ENTRY(MapClient) link;
2370 } MapClient;
2372 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2373 = QLIST_HEAD_INITIALIZER(map_client_list);
2375 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2377 MapClient *client = g_malloc(sizeof(*client));
2379 client->opaque = opaque;
2380 client->callback = callback;
2381 QLIST_INSERT_HEAD(&map_client_list, client, link);
2382 return client;
2385 static void cpu_unregister_map_client(void *_client)
2387 MapClient *client = (MapClient *)_client;
2389 QLIST_REMOVE(client, link);
2390 g_free(client);
2393 static void cpu_notify_map_clients(void)
2395 MapClient *client;
2397 while (!QLIST_EMPTY(&map_client_list)) {
2398 client = QLIST_FIRST(&map_client_list);
2399 client->callback(client->opaque);
2400 cpu_unregister_map_client(client);
2404 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2406 MemoryRegion *mr;
2407 hwaddr l, xlat;
2409 while (len > 0) {
2410 l = len;
2411 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2412 if (!memory_access_is_direct(mr, is_write)) {
2413 l = memory_access_size(mr, l, addr);
2414 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2415 return false;
2419 len -= l;
2420 addr += l;
2422 return true;
2425 /* Map a physical memory region into a host virtual address.
2426 * May map a subset of the requested range, given by and returned in *plen.
2427 * May return NULL if resources needed to perform the mapping are exhausted.
2428 * Use only for reads OR writes - not for read-modify-write operations.
2429 * Use cpu_register_map_client() to know when retrying the map operation is
2430 * likely to succeed.
2432 void *address_space_map(AddressSpace *as,
2433 hwaddr addr,
2434 hwaddr *plen,
2435 bool is_write)
2437 hwaddr len = *plen;
2438 hwaddr done = 0;
2439 hwaddr l, xlat, base;
2440 MemoryRegion *mr, *this_mr;
2441 ram_addr_t raddr;
2443 if (len == 0) {
2444 return NULL;
2447 l = len;
2448 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2449 if (!memory_access_is_direct(mr, is_write)) {
2450 if (bounce.buffer) {
2451 return NULL;
2453 /* Avoid unbounded allocations */
2454 l = MIN(l, TARGET_PAGE_SIZE);
2455 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2456 bounce.addr = addr;
2457 bounce.len = l;
2459 memory_region_ref(mr);
2460 bounce.mr = mr;
2461 if (!is_write) {
2462 address_space_read(as, addr, bounce.buffer, l);
2465 *plen = l;
2466 return bounce.buffer;
2469 base = xlat;
2470 raddr = memory_region_get_ram_addr(mr);
2472 for (;;) {
2473 len -= l;
2474 addr += l;
2475 done += l;
2476 if (len == 0) {
2477 break;
2480 l = len;
2481 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2482 if (this_mr != mr || xlat != base + done) {
2483 break;
2487 memory_region_ref(mr);
2488 *plen = done;
2489 return qemu_ram_ptr_length(raddr + base, plen);
2492 /* Unmaps a memory region previously mapped by address_space_map().
2493 * Will also mark the memory as dirty if is_write == 1. access_len gives
2494 * the amount of memory that was actually read or written by the caller.
2496 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2497 int is_write, hwaddr access_len)
2499 if (buffer != bounce.buffer) {
2500 MemoryRegion *mr;
2501 ram_addr_t addr1;
2503 mr = qemu_ram_addr_from_host(buffer, &addr1);
2504 assert(mr != NULL);
2505 if (is_write) {
2506 invalidate_and_set_dirty(addr1, access_len);
2508 if (xen_enabled()) {
2509 xen_invalidate_map_cache_entry(buffer);
2511 memory_region_unref(mr);
2512 return;
2514 if (is_write) {
2515 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2517 qemu_vfree(bounce.buffer);
2518 bounce.buffer = NULL;
2519 memory_region_unref(bounce.mr);
2520 cpu_notify_map_clients();
2523 void *cpu_physical_memory_map(hwaddr addr,
2524 hwaddr *plen,
2525 int is_write)
2527 return address_space_map(&address_space_memory, addr, plen, is_write);
2530 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2531 int is_write, hwaddr access_len)
2533 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2536 /* warning: addr must be aligned */
2537 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2538 enum device_endian endian)
2540 uint8_t *ptr;
2541 uint64_t val;
2542 MemoryRegion *mr;
2543 hwaddr l = 4;
2544 hwaddr addr1;
2546 mr = address_space_translate(as, addr, &addr1, &l, false);
2547 if (l < 4 || !memory_access_is_direct(mr, false)) {
2548 /* I/O case */
2549 io_mem_read(mr, addr1, &val, 4);
2550 #if defined(TARGET_WORDS_BIGENDIAN)
2551 if (endian == DEVICE_LITTLE_ENDIAN) {
2552 val = bswap32(val);
2554 #else
2555 if (endian == DEVICE_BIG_ENDIAN) {
2556 val = bswap32(val);
2558 #endif
2559 } else {
2560 /* RAM case */
2561 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2562 & TARGET_PAGE_MASK)
2563 + addr1);
2564 switch (endian) {
2565 case DEVICE_LITTLE_ENDIAN:
2566 val = ldl_le_p(ptr);
2567 break;
2568 case DEVICE_BIG_ENDIAN:
2569 val = ldl_be_p(ptr);
2570 break;
2571 default:
2572 val = ldl_p(ptr);
2573 break;
2576 return val;
2579 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2581 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2584 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2586 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2589 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2591 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2594 /* warning: addr must be aligned */
2595 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2596 enum device_endian endian)
2598 uint8_t *ptr;
2599 uint64_t val;
2600 MemoryRegion *mr;
2601 hwaddr l = 8;
2602 hwaddr addr1;
2604 mr = address_space_translate(as, addr, &addr1, &l,
2605 false);
2606 if (l < 8 || !memory_access_is_direct(mr, false)) {
2607 /* I/O case */
2608 io_mem_read(mr, addr1, &val, 8);
2609 #if defined(TARGET_WORDS_BIGENDIAN)
2610 if (endian == DEVICE_LITTLE_ENDIAN) {
2611 val = bswap64(val);
2613 #else
2614 if (endian == DEVICE_BIG_ENDIAN) {
2615 val = bswap64(val);
2617 #endif
2618 } else {
2619 /* RAM case */
2620 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2621 & TARGET_PAGE_MASK)
2622 + addr1);
2623 switch (endian) {
2624 case DEVICE_LITTLE_ENDIAN:
2625 val = ldq_le_p(ptr);
2626 break;
2627 case DEVICE_BIG_ENDIAN:
2628 val = ldq_be_p(ptr);
2629 break;
2630 default:
2631 val = ldq_p(ptr);
2632 break;
2635 return val;
2638 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2640 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2643 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2645 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2648 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2650 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2653 /* XXX: optimize */
2654 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2656 uint8_t val;
2657 address_space_rw(as, addr, &val, 1, 0);
2658 return val;
2661 /* warning: addr must be aligned */
2662 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2663 enum device_endian endian)
2665 uint8_t *ptr;
2666 uint64_t val;
2667 MemoryRegion *mr;
2668 hwaddr l = 2;
2669 hwaddr addr1;
2671 mr = address_space_translate(as, addr, &addr1, &l,
2672 false);
2673 if (l < 2 || !memory_access_is_direct(mr, false)) {
2674 /* I/O case */
2675 io_mem_read(mr, addr1, &val, 2);
2676 #if defined(TARGET_WORDS_BIGENDIAN)
2677 if (endian == DEVICE_LITTLE_ENDIAN) {
2678 val = bswap16(val);
2680 #else
2681 if (endian == DEVICE_BIG_ENDIAN) {
2682 val = bswap16(val);
2684 #endif
2685 } else {
2686 /* RAM case */
2687 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2688 & TARGET_PAGE_MASK)
2689 + addr1);
2690 switch (endian) {
2691 case DEVICE_LITTLE_ENDIAN:
2692 val = lduw_le_p(ptr);
2693 break;
2694 case DEVICE_BIG_ENDIAN:
2695 val = lduw_be_p(ptr);
2696 break;
2697 default:
2698 val = lduw_p(ptr);
2699 break;
2702 return val;
2705 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2707 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2710 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2712 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2715 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2717 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2720 /* warning: addr must be aligned. The ram page is not masked as dirty
2721 and the code inside is not invalidated. It is useful if the dirty
2722 bits are used to track modified PTEs */
2723 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2725 uint8_t *ptr;
2726 MemoryRegion *mr;
2727 hwaddr l = 4;
2728 hwaddr addr1;
2730 mr = address_space_translate(as, addr, &addr1, &l,
2731 true);
2732 if (l < 4 || !memory_access_is_direct(mr, true)) {
2733 io_mem_write(mr, addr1, val, 4);
2734 } else {
2735 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2736 ptr = qemu_get_ram_ptr(addr1);
2737 stl_p(ptr, val);
2739 if (unlikely(in_migration)) {
2740 if (cpu_physical_memory_is_clean(addr1)) {
2741 /* invalidate code */
2742 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2743 /* set dirty bit */
2744 cpu_physical_memory_set_dirty_range_nocode(addr1, 4);
2750 /* warning: addr must be aligned */
2751 static inline void stl_phys_internal(AddressSpace *as,
2752 hwaddr addr, uint32_t val,
2753 enum device_endian endian)
2755 uint8_t *ptr;
2756 MemoryRegion *mr;
2757 hwaddr l = 4;
2758 hwaddr addr1;
2760 mr = address_space_translate(as, addr, &addr1, &l,
2761 true);
2762 if (l < 4 || !memory_access_is_direct(mr, true)) {
2763 #if defined(TARGET_WORDS_BIGENDIAN)
2764 if (endian == DEVICE_LITTLE_ENDIAN) {
2765 val = bswap32(val);
2767 #else
2768 if (endian == DEVICE_BIG_ENDIAN) {
2769 val = bswap32(val);
2771 #endif
2772 io_mem_write(mr, addr1, val, 4);
2773 } else {
2774 /* RAM case */
2775 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2776 ptr = qemu_get_ram_ptr(addr1);
2777 switch (endian) {
2778 case DEVICE_LITTLE_ENDIAN:
2779 stl_le_p(ptr, val);
2780 break;
2781 case DEVICE_BIG_ENDIAN:
2782 stl_be_p(ptr, val);
2783 break;
2784 default:
2785 stl_p(ptr, val);
2786 break;
2788 invalidate_and_set_dirty(addr1, 4);
2792 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2794 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2797 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2799 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2802 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2804 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2807 /* XXX: optimize */
2808 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2810 uint8_t v = val;
2811 address_space_rw(as, addr, &v, 1, 1);
2814 /* warning: addr must be aligned */
2815 static inline void stw_phys_internal(AddressSpace *as,
2816 hwaddr addr, uint32_t val,
2817 enum device_endian endian)
2819 uint8_t *ptr;
2820 MemoryRegion *mr;
2821 hwaddr l = 2;
2822 hwaddr addr1;
2824 mr = address_space_translate(as, addr, &addr1, &l, true);
2825 if (l < 2 || !memory_access_is_direct(mr, true)) {
2826 #if defined(TARGET_WORDS_BIGENDIAN)
2827 if (endian == DEVICE_LITTLE_ENDIAN) {
2828 val = bswap16(val);
2830 #else
2831 if (endian == DEVICE_BIG_ENDIAN) {
2832 val = bswap16(val);
2834 #endif
2835 io_mem_write(mr, addr1, val, 2);
2836 } else {
2837 /* RAM case */
2838 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2839 ptr = qemu_get_ram_ptr(addr1);
2840 switch (endian) {
2841 case DEVICE_LITTLE_ENDIAN:
2842 stw_le_p(ptr, val);
2843 break;
2844 case DEVICE_BIG_ENDIAN:
2845 stw_be_p(ptr, val);
2846 break;
2847 default:
2848 stw_p(ptr, val);
2849 break;
2851 invalidate_and_set_dirty(addr1, 2);
2855 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2857 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2860 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2862 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2865 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2867 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2870 /* XXX: optimize */
2871 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2873 val = tswap64(val);
2874 address_space_rw(as, addr, (void *) &val, 8, 1);
2877 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2879 val = cpu_to_le64(val);
2880 address_space_rw(as, addr, (void *) &val, 8, 1);
2883 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2885 val = cpu_to_be64(val);
2886 address_space_rw(as, addr, (void *) &val, 8, 1);
2889 /* virtual memory access for debug (includes writing to ROM) */
2890 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2891 uint8_t *buf, int len, int is_write)
2893 int l;
2894 hwaddr phys_addr;
2895 target_ulong page;
2897 while (len > 0) {
2898 page = addr & TARGET_PAGE_MASK;
2899 phys_addr = cpu_get_phys_page_debug(cpu, page);
2900 /* if no physical page mapped, return an error */
2901 if (phys_addr == -1)
2902 return -1;
2903 l = (page + TARGET_PAGE_SIZE) - addr;
2904 if (l > len)
2905 l = len;
2906 phys_addr += (addr & ~TARGET_PAGE_MASK);
2907 if (is_write) {
2908 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
2909 } else {
2910 address_space_rw(cpu->as, phys_addr, buf, l, 0);
2912 len -= l;
2913 buf += l;
2914 addr += l;
2916 return 0;
2918 #endif
2921 * A helper function for the _utterly broken_ virtio device model to find out if
2922 * it's running on a big endian machine. Don't do this at home kids!
2924 bool target_words_bigendian(void);
2925 bool target_words_bigendian(void)
2927 #if defined(TARGET_WORDS_BIGENDIAN)
2928 return true;
2929 #else
2930 return false;
2931 #endif
2934 #ifndef CONFIG_USER_ONLY
2935 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2937 MemoryRegion*mr;
2938 hwaddr l = 1;
2940 mr = address_space_translate(&address_space_memory,
2941 phys_addr, &phys_addr, &l, false);
2943 return !(memory_region_is_ram(mr) ||
2944 memory_region_is_romd(mr));
2947 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2949 RAMBlock *block;
2951 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2952 func(block->host, block->offset, block->used_length, opaque);
2955 #endif