exec: RCUify AddressSpaceDispatch
[qemu/ar7.git] / exec.c
blob76b3b6cfe4c9c824a30f9db3b03e513a99b72deb
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #include "hw/qdev.h"
30 #include "qemu/osdep.h"
31 #include "sysemu/kvm.h"
32 #include "sysemu/sysemu.h"
33 #include "hw/xen/xen.h"
34 #include "qemu/timer.h"
35 #include "qemu/config-file.h"
36 #include "qemu/error-report.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
51 #include "exec/memory-internal.h"
52 #include "exec/ram_addr.h"
54 #include "qemu/range.h"
56 //#define DEBUG_SUBPAGE
58 #if !defined(CONFIG_USER_ONLY)
59 static bool in_migration;
61 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
63 static MemoryRegion *system_memory;
64 static MemoryRegion *system_io;
66 AddressSpace address_space_io;
67 AddressSpace address_space_memory;
69 MemoryRegion io_mem_rom, io_mem_notdirty;
70 static MemoryRegion io_mem_unassigned;
72 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
73 #define RAM_PREALLOC (1 << 0)
75 /* RAM is mmap-ed with MAP_SHARED */
76 #define RAM_SHARED (1 << 1)
78 /* Only a portion of RAM (used_length) is actually used, and migrated.
79 * This used_length size can change across reboots.
81 #define RAM_RESIZEABLE (1 << 2)
83 #endif
85 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
86 /* current CPU in the current thread. It is only valid inside
87 cpu_exec() */
88 DEFINE_TLS(CPUState *, current_cpu);
89 /* 0 = Do not count executed instructions.
90 1 = Precise instruction counting.
91 2 = Adaptive rate instruction counting. */
92 int use_icount;
94 #if !defined(CONFIG_USER_ONLY)
96 typedef struct PhysPageEntry PhysPageEntry;
98 struct PhysPageEntry {
99 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
100 uint32_t skip : 6;
101 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
102 uint32_t ptr : 26;
105 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
107 /* Size of the L2 (and L3, etc) page tables. */
108 #define ADDR_SPACE_BITS 64
110 #define P_L2_BITS 9
111 #define P_L2_SIZE (1 << P_L2_BITS)
113 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
115 typedef PhysPageEntry Node[P_L2_SIZE];
117 typedef struct PhysPageMap {
118 struct rcu_head rcu;
120 unsigned sections_nb;
121 unsigned sections_nb_alloc;
122 unsigned nodes_nb;
123 unsigned nodes_nb_alloc;
124 Node *nodes;
125 MemoryRegionSection *sections;
126 } PhysPageMap;
128 struct AddressSpaceDispatch {
129 struct rcu_head rcu;
131 /* This is a multi-level map on the physical address space.
132 * The bottom level has pointers to MemoryRegionSections.
134 PhysPageEntry phys_map;
135 PhysPageMap map;
136 AddressSpace *as;
139 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
140 typedef struct subpage_t {
141 MemoryRegion iomem;
142 AddressSpace *as;
143 hwaddr base;
144 uint16_t sub_section[TARGET_PAGE_SIZE];
145 } subpage_t;
147 #define PHYS_SECTION_UNASSIGNED 0
148 #define PHYS_SECTION_NOTDIRTY 1
149 #define PHYS_SECTION_ROM 2
150 #define PHYS_SECTION_WATCH 3
152 static void io_mem_init(void);
153 static void memory_map_init(void);
154 static void tcg_commit(MemoryListener *listener);
156 static MemoryRegion io_mem_watch;
157 #endif
159 #if !defined(CONFIG_USER_ONLY)
161 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
163 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
164 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
165 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
166 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
170 static uint32_t phys_map_node_alloc(PhysPageMap *map)
172 unsigned i;
173 uint32_t ret;
175 ret = map->nodes_nb++;
176 assert(ret != PHYS_MAP_NODE_NIL);
177 assert(ret != map->nodes_nb_alloc);
178 for (i = 0; i < P_L2_SIZE; ++i) {
179 map->nodes[ret][i].skip = 1;
180 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
182 return ret;
185 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
186 hwaddr *index, hwaddr *nb, uint16_t leaf,
187 int level)
189 PhysPageEntry *p;
190 int i;
191 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
193 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
194 lp->ptr = phys_map_node_alloc(map);
195 p = map->nodes[lp->ptr];
196 if (level == 0) {
197 for (i = 0; i < P_L2_SIZE; i++) {
198 p[i].skip = 0;
199 p[i].ptr = PHYS_SECTION_UNASSIGNED;
202 } else {
203 p = map->nodes[lp->ptr];
205 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
207 while (*nb && lp < &p[P_L2_SIZE]) {
208 if ((*index & (step - 1)) == 0 && *nb >= step) {
209 lp->skip = 0;
210 lp->ptr = leaf;
211 *index += step;
212 *nb -= step;
213 } else {
214 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
216 ++lp;
220 static void phys_page_set(AddressSpaceDispatch *d,
221 hwaddr index, hwaddr nb,
222 uint16_t leaf)
224 /* Wildly overreserve - it doesn't matter much. */
225 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
227 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
230 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
231 * and update our entry so we can skip it and go directly to the destination.
233 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
235 unsigned valid_ptr = P_L2_SIZE;
236 int valid = 0;
237 PhysPageEntry *p;
238 int i;
240 if (lp->ptr == PHYS_MAP_NODE_NIL) {
241 return;
244 p = nodes[lp->ptr];
245 for (i = 0; i < P_L2_SIZE; i++) {
246 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
247 continue;
250 valid_ptr = i;
251 valid++;
252 if (p[i].skip) {
253 phys_page_compact(&p[i], nodes, compacted);
257 /* We can only compress if there's only one child. */
258 if (valid != 1) {
259 return;
262 assert(valid_ptr < P_L2_SIZE);
264 /* Don't compress if it won't fit in the # of bits we have. */
265 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
266 return;
269 lp->ptr = p[valid_ptr].ptr;
270 if (!p[valid_ptr].skip) {
271 /* If our only child is a leaf, make this a leaf. */
272 /* By design, we should have made this node a leaf to begin with so we
273 * should never reach here.
274 * But since it's so simple to handle this, let's do it just in case we
275 * change this rule.
277 lp->skip = 0;
278 } else {
279 lp->skip += p[valid_ptr].skip;
283 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
285 DECLARE_BITMAP(compacted, nodes_nb);
287 if (d->phys_map.skip) {
288 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
292 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
293 Node *nodes, MemoryRegionSection *sections)
295 PhysPageEntry *p;
296 hwaddr index = addr >> TARGET_PAGE_BITS;
297 int i;
299 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
300 if (lp.ptr == PHYS_MAP_NODE_NIL) {
301 return &sections[PHYS_SECTION_UNASSIGNED];
303 p = nodes[lp.ptr];
304 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
307 if (sections[lp.ptr].size.hi ||
308 range_covers_byte(sections[lp.ptr].offset_within_address_space,
309 sections[lp.ptr].size.lo, addr)) {
310 return &sections[lp.ptr];
311 } else {
312 return &sections[PHYS_SECTION_UNASSIGNED];
316 bool memory_region_is_unassigned(MemoryRegion *mr)
318 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
319 && mr != &io_mem_watch;
322 /* Called from RCU critical section */
323 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
324 hwaddr addr,
325 bool resolve_subpage)
327 MemoryRegionSection *section;
328 subpage_t *subpage;
330 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
331 if (resolve_subpage && section->mr->subpage) {
332 subpage = container_of(section->mr, subpage_t, iomem);
333 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
335 return section;
338 /* Called from RCU critical section */
339 static MemoryRegionSection *
340 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
341 hwaddr *plen, bool resolve_subpage)
343 MemoryRegionSection *section;
344 Int128 diff;
346 section = address_space_lookup_region(d, addr, resolve_subpage);
347 /* Compute offset within MemoryRegionSection */
348 addr -= section->offset_within_address_space;
350 /* Compute offset within MemoryRegion */
351 *xlat = addr + section->offset_within_region;
353 diff = int128_sub(section->mr->size, int128_make64(addr));
354 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
355 return section;
358 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
360 if (memory_region_is_ram(mr)) {
361 return !(is_write && mr->readonly);
363 if (memory_region_is_romd(mr)) {
364 return !is_write;
367 return false;
370 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
371 hwaddr *xlat, hwaddr *plen,
372 bool is_write)
374 IOMMUTLBEntry iotlb;
375 MemoryRegionSection *section;
376 MemoryRegion *mr;
377 hwaddr len = *plen;
379 rcu_read_lock();
380 for (;;) {
381 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
382 section = address_space_translate_internal(d, addr, &addr, plen, true);
383 mr = section->mr;
385 if (!mr->iommu_ops) {
386 break;
389 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
390 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
391 | (addr & iotlb.addr_mask));
392 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
393 if (!(iotlb.perm & (1 << is_write))) {
394 mr = &io_mem_unassigned;
395 break;
398 as = iotlb.target_as;
401 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
402 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
403 len = MIN(page, len);
406 *plen = len;
407 *xlat = addr;
408 rcu_read_unlock();
409 return mr;
412 /* Called from RCU critical section */
413 MemoryRegionSection *
414 address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
415 hwaddr *xlat, hwaddr *plen)
417 MemoryRegionSection *section;
418 section = address_space_translate_internal(cpu->memory_dispatch,
419 addr, xlat, plen, false);
421 assert(!section->mr->iommu_ops);
422 return section;
424 #endif
426 void cpu_exec_init_all(void)
428 #if !defined(CONFIG_USER_ONLY)
429 qemu_mutex_init(&ram_list.mutex);
430 memory_map_init();
431 io_mem_init();
432 #endif
435 #if !defined(CONFIG_USER_ONLY)
437 static int cpu_common_post_load(void *opaque, int version_id)
439 CPUState *cpu = opaque;
441 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
442 version_id is increased. */
443 cpu->interrupt_request &= ~0x01;
444 tlb_flush(cpu, 1);
446 return 0;
449 static int cpu_common_pre_load(void *opaque)
451 CPUState *cpu = opaque;
453 cpu->exception_index = -1;
455 return 0;
458 static bool cpu_common_exception_index_needed(void *opaque)
460 CPUState *cpu = opaque;
462 return tcg_enabled() && cpu->exception_index != -1;
465 static const VMStateDescription vmstate_cpu_common_exception_index = {
466 .name = "cpu_common/exception_index",
467 .version_id = 1,
468 .minimum_version_id = 1,
469 .fields = (VMStateField[]) {
470 VMSTATE_INT32(exception_index, CPUState),
471 VMSTATE_END_OF_LIST()
475 const VMStateDescription vmstate_cpu_common = {
476 .name = "cpu_common",
477 .version_id = 1,
478 .minimum_version_id = 1,
479 .pre_load = cpu_common_pre_load,
480 .post_load = cpu_common_post_load,
481 .fields = (VMStateField[]) {
482 VMSTATE_UINT32(halted, CPUState),
483 VMSTATE_UINT32(interrupt_request, CPUState),
484 VMSTATE_END_OF_LIST()
486 .subsections = (VMStateSubsection[]) {
488 .vmsd = &vmstate_cpu_common_exception_index,
489 .needed = cpu_common_exception_index_needed,
490 } , {
491 /* empty */
496 #endif
498 CPUState *qemu_get_cpu(int index)
500 CPUState *cpu;
502 CPU_FOREACH(cpu) {
503 if (cpu->cpu_index == index) {
504 return cpu;
508 return NULL;
511 #if !defined(CONFIG_USER_ONLY)
512 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
514 /* We only support one address space per cpu at the moment. */
515 assert(cpu->as == as);
517 if (cpu->tcg_as_listener) {
518 memory_listener_unregister(cpu->tcg_as_listener);
519 } else {
520 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
522 cpu->tcg_as_listener->commit = tcg_commit;
523 memory_listener_register(cpu->tcg_as_listener, as);
525 #endif
527 void cpu_exec_init(CPUArchState *env)
529 CPUState *cpu = ENV_GET_CPU(env);
530 CPUClass *cc = CPU_GET_CLASS(cpu);
531 CPUState *some_cpu;
532 int cpu_index;
534 #if defined(CONFIG_USER_ONLY)
535 cpu_list_lock();
536 #endif
537 cpu_index = 0;
538 CPU_FOREACH(some_cpu) {
539 cpu_index++;
541 cpu->cpu_index = cpu_index;
542 cpu->numa_node = 0;
543 QTAILQ_INIT(&cpu->breakpoints);
544 QTAILQ_INIT(&cpu->watchpoints);
545 #ifndef CONFIG_USER_ONLY
546 cpu->as = &address_space_memory;
547 cpu->thread_id = qemu_get_thread_id();
548 #endif
549 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
550 #if defined(CONFIG_USER_ONLY)
551 cpu_list_unlock();
552 #endif
553 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
554 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
556 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
557 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
558 cpu_save, cpu_load, env);
559 assert(cc->vmsd == NULL);
560 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
561 #endif
562 if (cc->vmsd != NULL) {
563 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
567 #if defined(CONFIG_USER_ONLY)
568 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
570 tb_invalidate_phys_page_range(pc, pc + 1, 0);
572 #else
573 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
575 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
576 if (phys != -1) {
577 tb_invalidate_phys_addr(cpu->as,
578 phys | (pc & ~TARGET_PAGE_MASK));
581 #endif
583 #if defined(CONFIG_USER_ONLY)
584 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
589 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
590 int flags)
592 return -ENOSYS;
595 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
599 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
600 int flags, CPUWatchpoint **watchpoint)
602 return -ENOSYS;
604 #else
605 /* Add a watchpoint. */
606 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
607 int flags, CPUWatchpoint **watchpoint)
609 CPUWatchpoint *wp;
611 /* forbid ranges which are empty or run off the end of the address space */
612 if (len == 0 || (addr + len - 1) < addr) {
613 error_report("tried to set invalid watchpoint at %"
614 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
615 return -EINVAL;
617 wp = g_malloc(sizeof(*wp));
619 wp->vaddr = addr;
620 wp->len = len;
621 wp->flags = flags;
623 /* keep all GDB-injected watchpoints in front */
624 if (flags & BP_GDB) {
625 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
626 } else {
627 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
630 tlb_flush_page(cpu, addr);
632 if (watchpoint)
633 *watchpoint = wp;
634 return 0;
637 /* Remove a specific watchpoint. */
638 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
639 int flags)
641 CPUWatchpoint *wp;
643 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
644 if (addr == wp->vaddr && len == wp->len
645 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
646 cpu_watchpoint_remove_by_ref(cpu, wp);
647 return 0;
650 return -ENOENT;
653 /* Remove a specific watchpoint by reference. */
654 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
656 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
658 tlb_flush_page(cpu, watchpoint->vaddr);
660 g_free(watchpoint);
663 /* Remove all matching watchpoints. */
664 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
666 CPUWatchpoint *wp, *next;
668 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
669 if (wp->flags & mask) {
670 cpu_watchpoint_remove_by_ref(cpu, wp);
675 /* Return true if this watchpoint address matches the specified
676 * access (ie the address range covered by the watchpoint overlaps
677 * partially or completely with the address range covered by the
678 * access).
680 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
681 vaddr addr,
682 vaddr len)
684 /* We know the lengths are non-zero, but a little caution is
685 * required to avoid errors in the case where the range ends
686 * exactly at the top of the address space and so addr + len
687 * wraps round to zero.
689 vaddr wpend = wp->vaddr + wp->len - 1;
690 vaddr addrend = addr + len - 1;
692 return !(addr > wpend || wp->vaddr > addrend);
695 #endif
697 /* Add a breakpoint. */
698 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
699 CPUBreakpoint **breakpoint)
701 CPUBreakpoint *bp;
703 bp = g_malloc(sizeof(*bp));
705 bp->pc = pc;
706 bp->flags = flags;
708 /* keep all GDB-injected breakpoints in front */
709 if (flags & BP_GDB) {
710 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
711 } else {
712 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
715 breakpoint_invalidate(cpu, pc);
717 if (breakpoint) {
718 *breakpoint = bp;
720 return 0;
723 /* Remove a specific breakpoint. */
724 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
726 CPUBreakpoint *bp;
728 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
729 if (bp->pc == pc && bp->flags == flags) {
730 cpu_breakpoint_remove_by_ref(cpu, bp);
731 return 0;
734 return -ENOENT;
737 /* Remove a specific breakpoint by reference. */
738 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
740 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
742 breakpoint_invalidate(cpu, breakpoint->pc);
744 g_free(breakpoint);
747 /* Remove all matching breakpoints. */
748 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
750 CPUBreakpoint *bp, *next;
752 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
753 if (bp->flags & mask) {
754 cpu_breakpoint_remove_by_ref(cpu, bp);
759 /* enable or disable single step mode. EXCP_DEBUG is returned by the
760 CPU loop after each instruction */
761 void cpu_single_step(CPUState *cpu, int enabled)
763 if (cpu->singlestep_enabled != enabled) {
764 cpu->singlestep_enabled = enabled;
765 if (kvm_enabled()) {
766 kvm_update_guest_debug(cpu, 0);
767 } else {
768 /* must flush all the translated code to avoid inconsistencies */
769 /* XXX: only flush what is necessary */
770 CPUArchState *env = cpu->env_ptr;
771 tb_flush(env);
776 void cpu_abort(CPUState *cpu, const char *fmt, ...)
778 va_list ap;
779 va_list ap2;
781 va_start(ap, fmt);
782 va_copy(ap2, ap);
783 fprintf(stderr, "qemu: fatal: ");
784 vfprintf(stderr, fmt, ap);
785 fprintf(stderr, "\n");
786 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
787 if (qemu_log_enabled()) {
788 qemu_log("qemu: fatal: ");
789 qemu_log_vprintf(fmt, ap2);
790 qemu_log("\n");
791 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
792 qemu_log_flush();
793 qemu_log_close();
795 va_end(ap2);
796 va_end(ap);
797 #if defined(CONFIG_USER_ONLY)
799 struct sigaction act;
800 sigfillset(&act.sa_mask);
801 act.sa_handler = SIG_DFL;
802 sigaction(SIGABRT, &act, NULL);
804 #endif
805 abort();
808 #if !defined(CONFIG_USER_ONLY)
809 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
811 RAMBlock *block;
813 /* The list is protected by the iothread lock here. */
814 block = ram_list.mru_block;
815 if (block && addr - block->offset < block->max_length) {
816 goto found;
818 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
819 if (addr - block->offset < block->max_length) {
820 goto found;
824 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
825 abort();
827 found:
828 ram_list.mru_block = block;
829 return block;
832 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
834 ram_addr_t start1;
835 RAMBlock *block;
836 ram_addr_t end;
838 end = TARGET_PAGE_ALIGN(start + length);
839 start &= TARGET_PAGE_MASK;
841 block = qemu_get_ram_block(start);
842 assert(block == qemu_get_ram_block(end - 1));
843 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
844 cpu_tlb_reset_dirty_all(start1, length);
847 /* Note: start and end must be within the same ram block. */
848 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
849 unsigned client)
851 if (length == 0)
852 return;
853 cpu_physical_memory_clear_dirty_range_type(start, length, client);
855 if (tcg_enabled()) {
856 tlb_reset_dirty_range_all(start, length);
860 static void cpu_physical_memory_set_dirty_tracking(bool enable)
862 in_migration = enable;
865 /* Called from RCU critical section */
866 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
867 MemoryRegionSection *section,
868 target_ulong vaddr,
869 hwaddr paddr, hwaddr xlat,
870 int prot,
871 target_ulong *address)
873 hwaddr iotlb;
874 CPUWatchpoint *wp;
876 if (memory_region_is_ram(section->mr)) {
877 /* Normal RAM. */
878 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
879 + xlat;
880 if (!section->readonly) {
881 iotlb |= PHYS_SECTION_NOTDIRTY;
882 } else {
883 iotlb |= PHYS_SECTION_ROM;
885 } else {
886 iotlb = section - section->address_space->dispatch->map.sections;
887 iotlb += xlat;
890 /* Make accesses to pages with watchpoints go via the
891 watchpoint trap routines. */
892 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
893 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
894 /* Avoid trapping reads of pages with a write breakpoint. */
895 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
896 iotlb = PHYS_SECTION_WATCH + paddr;
897 *address |= TLB_MMIO;
898 break;
903 return iotlb;
905 #endif /* defined(CONFIG_USER_ONLY) */
907 #if !defined(CONFIG_USER_ONLY)
909 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
910 uint16_t section);
911 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
913 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
914 qemu_anon_ram_alloc;
917 * Set a custom physical guest memory alloator.
918 * Accelerators with unusual needs may need this. Hopefully, we can
919 * get rid of it eventually.
921 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
923 phys_mem_alloc = alloc;
926 static uint16_t phys_section_add(PhysPageMap *map,
927 MemoryRegionSection *section)
929 /* The physical section number is ORed with a page-aligned
930 * pointer to produce the iotlb entries. Thus it should
931 * never overflow into the page-aligned value.
933 assert(map->sections_nb < TARGET_PAGE_SIZE);
935 if (map->sections_nb == map->sections_nb_alloc) {
936 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
937 map->sections = g_renew(MemoryRegionSection, map->sections,
938 map->sections_nb_alloc);
940 map->sections[map->sections_nb] = *section;
941 memory_region_ref(section->mr);
942 return map->sections_nb++;
945 static void phys_section_destroy(MemoryRegion *mr)
947 memory_region_unref(mr);
949 if (mr->subpage) {
950 subpage_t *subpage = container_of(mr, subpage_t, iomem);
951 object_unref(OBJECT(&subpage->iomem));
952 g_free(subpage);
956 static void phys_sections_free(PhysPageMap *map)
958 while (map->sections_nb > 0) {
959 MemoryRegionSection *section = &map->sections[--map->sections_nb];
960 phys_section_destroy(section->mr);
962 g_free(map->sections);
963 g_free(map->nodes);
966 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
968 subpage_t *subpage;
969 hwaddr base = section->offset_within_address_space
970 & TARGET_PAGE_MASK;
971 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
972 d->map.nodes, d->map.sections);
973 MemoryRegionSection subsection = {
974 .offset_within_address_space = base,
975 .size = int128_make64(TARGET_PAGE_SIZE),
977 hwaddr start, end;
979 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
981 if (!(existing->mr->subpage)) {
982 subpage = subpage_init(d->as, base);
983 subsection.address_space = d->as;
984 subsection.mr = &subpage->iomem;
985 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
986 phys_section_add(&d->map, &subsection));
987 } else {
988 subpage = container_of(existing->mr, subpage_t, iomem);
990 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
991 end = start + int128_get64(section->size) - 1;
992 subpage_register(subpage, start, end,
993 phys_section_add(&d->map, section));
997 static void register_multipage(AddressSpaceDispatch *d,
998 MemoryRegionSection *section)
1000 hwaddr start_addr = section->offset_within_address_space;
1001 uint16_t section_index = phys_section_add(&d->map, section);
1002 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1003 TARGET_PAGE_BITS));
1005 assert(num_pages);
1006 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1009 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1011 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1012 AddressSpaceDispatch *d = as->next_dispatch;
1013 MemoryRegionSection now = *section, remain = *section;
1014 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1016 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1017 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1018 - now.offset_within_address_space;
1020 now.size = int128_min(int128_make64(left), now.size);
1021 register_subpage(d, &now);
1022 } else {
1023 now.size = int128_zero();
1025 while (int128_ne(remain.size, now.size)) {
1026 remain.size = int128_sub(remain.size, now.size);
1027 remain.offset_within_address_space += int128_get64(now.size);
1028 remain.offset_within_region += int128_get64(now.size);
1029 now = remain;
1030 if (int128_lt(remain.size, page_size)) {
1031 register_subpage(d, &now);
1032 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1033 now.size = page_size;
1034 register_subpage(d, &now);
1035 } else {
1036 now.size = int128_and(now.size, int128_neg(page_size));
1037 register_multipage(d, &now);
1042 void qemu_flush_coalesced_mmio_buffer(void)
1044 if (kvm_enabled())
1045 kvm_flush_coalesced_mmio_buffer();
1048 void qemu_mutex_lock_ramlist(void)
1050 qemu_mutex_lock(&ram_list.mutex);
1053 void qemu_mutex_unlock_ramlist(void)
1055 qemu_mutex_unlock(&ram_list.mutex);
1058 #ifdef __linux__
1060 #include <sys/vfs.h>
1062 #define HUGETLBFS_MAGIC 0x958458f6
1064 static long gethugepagesize(const char *path, Error **errp)
1066 struct statfs fs;
1067 int ret;
1069 do {
1070 ret = statfs(path, &fs);
1071 } while (ret != 0 && errno == EINTR);
1073 if (ret != 0) {
1074 error_setg_errno(errp, errno, "failed to get page size of file %s",
1075 path);
1076 return 0;
1079 if (fs.f_type != HUGETLBFS_MAGIC)
1080 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1082 return fs.f_bsize;
1085 static void *file_ram_alloc(RAMBlock *block,
1086 ram_addr_t memory,
1087 const char *path,
1088 Error **errp)
1090 char *filename;
1091 char *sanitized_name;
1092 char *c;
1093 void *area = NULL;
1094 int fd;
1095 uint64_t hpagesize;
1096 Error *local_err = NULL;
1098 hpagesize = gethugepagesize(path, &local_err);
1099 if (local_err) {
1100 error_propagate(errp, local_err);
1101 goto error;
1103 block->mr->align = hpagesize;
1105 if (memory < hpagesize) {
1106 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1107 "or larger than huge page size 0x%" PRIx64,
1108 memory, hpagesize);
1109 goto error;
1112 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1113 error_setg(errp,
1114 "host lacks kvm mmu notifiers, -mem-path unsupported");
1115 goto error;
1118 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1119 sanitized_name = g_strdup(memory_region_name(block->mr));
1120 for (c = sanitized_name; *c != '\0'; c++) {
1121 if (*c == '/')
1122 *c = '_';
1125 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1126 sanitized_name);
1127 g_free(sanitized_name);
1129 fd = mkstemp(filename);
1130 if (fd < 0) {
1131 error_setg_errno(errp, errno,
1132 "unable to create backing store for hugepages");
1133 g_free(filename);
1134 goto error;
1136 unlink(filename);
1137 g_free(filename);
1139 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1142 * ftruncate is not supported by hugetlbfs in older
1143 * hosts, so don't bother bailing out on errors.
1144 * If anything goes wrong with it under other filesystems,
1145 * mmap will fail.
1147 if (ftruncate(fd, memory)) {
1148 perror("ftruncate");
1151 area = mmap(0, memory, PROT_READ | PROT_WRITE,
1152 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
1153 fd, 0);
1154 if (area == MAP_FAILED) {
1155 error_setg_errno(errp, errno,
1156 "unable to map backing store for hugepages");
1157 close(fd);
1158 goto error;
1161 if (mem_prealloc) {
1162 os_mem_prealloc(fd, area, memory);
1165 block->fd = fd;
1166 return area;
1168 error:
1169 if (mem_prealloc) {
1170 error_report("%s\n", error_get_pretty(*errp));
1171 exit(1);
1173 return NULL;
1175 #endif
1177 static ram_addr_t find_ram_offset(ram_addr_t size)
1179 RAMBlock *block, *next_block;
1180 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1182 assert(size != 0); /* it would hand out same offset multiple times */
1184 if (QTAILQ_EMPTY(&ram_list.blocks))
1185 return 0;
1187 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1188 ram_addr_t end, next = RAM_ADDR_MAX;
1190 end = block->offset + block->max_length;
1192 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1193 if (next_block->offset >= end) {
1194 next = MIN(next, next_block->offset);
1197 if (next - end >= size && next - end < mingap) {
1198 offset = end;
1199 mingap = next - end;
1203 if (offset == RAM_ADDR_MAX) {
1204 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1205 (uint64_t)size);
1206 abort();
1209 return offset;
1212 ram_addr_t last_ram_offset(void)
1214 RAMBlock *block;
1215 ram_addr_t last = 0;
1217 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1218 last = MAX(last, block->offset + block->max_length);
1220 return last;
1223 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1225 int ret;
1227 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1228 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1229 "dump-guest-core", true)) {
1230 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1231 if (ret) {
1232 perror("qemu_madvise");
1233 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1234 "but dump_guest_core=off specified\n");
1239 static RAMBlock *find_ram_block(ram_addr_t addr)
1241 RAMBlock *block;
1243 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1244 if (block->offset == addr) {
1245 return block;
1249 return NULL;
1252 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1254 RAMBlock *new_block = find_ram_block(addr);
1255 RAMBlock *block;
1257 assert(new_block);
1258 assert(!new_block->idstr[0]);
1260 if (dev) {
1261 char *id = qdev_get_dev_path(dev);
1262 if (id) {
1263 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1264 g_free(id);
1267 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1269 /* This assumes the iothread lock is taken here too. */
1270 qemu_mutex_lock_ramlist();
1271 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1272 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1273 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1274 new_block->idstr);
1275 abort();
1278 qemu_mutex_unlock_ramlist();
1281 void qemu_ram_unset_idstr(ram_addr_t addr)
1283 RAMBlock *block = find_ram_block(addr);
1285 if (block) {
1286 memset(block->idstr, 0, sizeof(block->idstr));
1290 static int memory_try_enable_merging(void *addr, size_t len)
1292 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1293 /* disabled by the user */
1294 return 0;
1297 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1300 /* Only legal before guest might have detected the memory size: e.g. on
1301 * incoming migration, or right after reset.
1303 * As memory core doesn't know how is memory accessed, it is up to
1304 * resize callback to update device state and/or add assertions to detect
1305 * misuse, if necessary.
1307 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1309 RAMBlock *block = find_ram_block(base);
1311 assert(block);
1313 if (block->used_length == newsize) {
1314 return 0;
1317 if (!(block->flags & RAM_RESIZEABLE)) {
1318 error_setg_errno(errp, EINVAL,
1319 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1320 " in != 0x" RAM_ADDR_FMT, block->idstr,
1321 newsize, block->used_length);
1322 return -EINVAL;
1325 if (block->max_length < newsize) {
1326 error_setg_errno(errp, EINVAL,
1327 "Length too large: %s: 0x" RAM_ADDR_FMT
1328 " > 0x" RAM_ADDR_FMT, block->idstr,
1329 newsize, block->max_length);
1330 return -EINVAL;
1333 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1334 block->used_length = newsize;
1335 cpu_physical_memory_set_dirty_range(block->offset, block->used_length);
1336 memory_region_set_size(block->mr, newsize);
1337 if (block->resized) {
1338 block->resized(block->idstr, newsize, block->host);
1340 return 0;
1343 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1345 RAMBlock *block;
1346 ram_addr_t old_ram_size, new_ram_size;
1348 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1350 /* This assumes the iothread lock is taken here too. */
1351 qemu_mutex_lock_ramlist();
1352 new_block->offset = find_ram_offset(new_block->max_length);
1354 if (!new_block->host) {
1355 if (xen_enabled()) {
1356 xen_ram_alloc(new_block->offset, new_block->max_length,
1357 new_block->mr);
1358 } else {
1359 new_block->host = phys_mem_alloc(new_block->max_length,
1360 &new_block->mr->align);
1361 if (!new_block->host) {
1362 error_setg_errno(errp, errno,
1363 "cannot set up guest memory '%s'",
1364 memory_region_name(new_block->mr));
1365 qemu_mutex_unlock_ramlist();
1366 return -1;
1368 memory_try_enable_merging(new_block->host, new_block->max_length);
1372 /* Keep the list sorted from biggest to smallest block. */
1373 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1374 if (block->max_length < new_block->max_length) {
1375 break;
1378 if (block) {
1379 QTAILQ_INSERT_BEFORE(block, new_block, next);
1380 } else {
1381 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1383 ram_list.mru_block = NULL;
1385 ram_list.version++;
1386 qemu_mutex_unlock_ramlist();
1388 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1390 if (new_ram_size > old_ram_size) {
1391 int i;
1392 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1393 ram_list.dirty_memory[i] =
1394 bitmap_zero_extend(ram_list.dirty_memory[i],
1395 old_ram_size, new_ram_size);
1398 cpu_physical_memory_set_dirty_range(new_block->offset,
1399 new_block->used_length);
1401 if (new_block->host) {
1402 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1403 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1404 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1405 if (kvm_enabled()) {
1406 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1410 return new_block->offset;
1413 #ifdef __linux__
1414 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1415 bool share, const char *mem_path,
1416 Error **errp)
1418 RAMBlock *new_block;
1419 ram_addr_t addr;
1420 Error *local_err = NULL;
1422 if (xen_enabled()) {
1423 error_setg(errp, "-mem-path not supported with Xen");
1424 return -1;
1427 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1429 * file_ram_alloc() needs to allocate just like
1430 * phys_mem_alloc, but we haven't bothered to provide
1431 * a hook there.
1433 error_setg(errp,
1434 "-mem-path not supported with this accelerator");
1435 return -1;
1438 size = TARGET_PAGE_ALIGN(size);
1439 new_block = g_malloc0(sizeof(*new_block));
1440 new_block->mr = mr;
1441 new_block->used_length = size;
1442 new_block->max_length = size;
1443 new_block->flags = share ? RAM_SHARED : 0;
1444 new_block->host = file_ram_alloc(new_block, size,
1445 mem_path, errp);
1446 if (!new_block->host) {
1447 g_free(new_block);
1448 return -1;
1451 addr = ram_block_add(new_block, &local_err);
1452 if (local_err) {
1453 g_free(new_block);
1454 error_propagate(errp, local_err);
1455 return -1;
1457 return addr;
1459 #endif
1461 static
1462 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1463 void (*resized)(const char*,
1464 uint64_t length,
1465 void *host),
1466 void *host, bool resizeable,
1467 MemoryRegion *mr, Error **errp)
1469 RAMBlock *new_block;
1470 ram_addr_t addr;
1471 Error *local_err = NULL;
1473 size = TARGET_PAGE_ALIGN(size);
1474 max_size = TARGET_PAGE_ALIGN(max_size);
1475 new_block = g_malloc0(sizeof(*new_block));
1476 new_block->mr = mr;
1477 new_block->resized = resized;
1478 new_block->used_length = size;
1479 new_block->max_length = max_size;
1480 assert(max_size >= size);
1481 new_block->fd = -1;
1482 new_block->host = host;
1483 if (host) {
1484 new_block->flags |= RAM_PREALLOC;
1486 if (resizeable) {
1487 new_block->flags |= RAM_RESIZEABLE;
1489 addr = ram_block_add(new_block, &local_err);
1490 if (local_err) {
1491 g_free(new_block);
1492 error_propagate(errp, local_err);
1493 return -1;
1495 return addr;
1498 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1499 MemoryRegion *mr, Error **errp)
1501 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1504 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1506 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1509 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1510 void (*resized)(const char*,
1511 uint64_t length,
1512 void *host),
1513 MemoryRegion *mr, Error **errp)
1515 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1518 void qemu_ram_free_from_ptr(ram_addr_t addr)
1520 RAMBlock *block;
1522 /* This assumes the iothread lock is taken here too. */
1523 qemu_mutex_lock_ramlist();
1524 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1525 if (addr == block->offset) {
1526 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1527 ram_list.mru_block = NULL;
1528 ram_list.version++;
1529 g_free(block);
1530 break;
1533 qemu_mutex_unlock_ramlist();
1536 void qemu_ram_free(ram_addr_t addr)
1538 RAMBlock *block;
1540 /* This assumes the iothread lock is taken here too. */
1541 qemu_mutex_lock_ramlist();
1542 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1543 if (addr == block->offset) {
1544 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1545 ram_list.mru_block = NULL;
1546 ram_list.version++;
1547 if (block->flags & RAM_PREALLOC) {
1549 } else if (xen_enabled()) {
1550 xen_invalidate_map_cache_entry(block->host);
1551 #ifndef _WIN32
1552 } else if (block->fd >= 0) {
1553 munmap(block->host, block->max_length);
1554 close(block->fd);
1555 #endif
1556 } else {
1557 qemu_anon_ram_free(block->host, block->max_length);
1559 g_free(block);
1560 break;
1563 qemu_mutex_unlock_ramlist();
1567 #ifndef _WIN32
1568 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1570 RAMBlock *block;
1571 ram_addr_t offset;
1572 int flags;
1573 void *area, *vaddr;
1575 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1576 offset = addr - block->offset;
1577 if (offset < block->max_length) {
1578 vaddr = ramblock_ptr(block, offset);
1579 if (block->flags & RAM_PREALLOC) {
1581 } else if (xen_enabled()) {
1582 abort();
1583 } else {
1584 flags = MAP_FIXED;
1585 munmap(vaddr, length);
1586 if (block->fd >= 0) {
1587 flags |= (block->flags & RAM_SHARED ?
1588 MAP_SHARED : MAP_PRIVATE);
1589 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1590 flags, block->fd, offset);
1591 } else {
1593 * Remap needs to match alloc. Accelerators that
1594 * set phys_mem_alloc never remap. If they did,
1595 * we'd need a remap hook here.
1597 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1599 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1600 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1601 flags, -1, 0);
1603 if (area != vaddr) {
1604 fprintf(stderr, "Could not remap addr: "
1605 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1606 length, addr);
1607 exit(1);
1609 memory_try_enable_merging(vaddr, length);
1610 qemu_ram_setup_dump(vaddr, length);
1612 return;
1616 #endif /* !_WIN32 */
1618 int qemu_get_ram_fd(ram_addr_t addr)
1620 RAMBlock *block = qemu_get_ram_block(addr);
1622 return block->fd;
1625 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1627 RAMBlock *block = qemu_get_ram_block(addr);
1629 return ramblock_ptr(block, 0);
1632 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1633 With the exception of the softmmu code in this file, this should
1634 only be used for local memory (e.g. video ram) that the device owns,
1635 and knows it isn't going to access beyond the end of the block.
1637 It should not be used for general purpose DMA.
1638 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1640 void *qemu_get_ram_ptr(ram_addr_t addr)
1642 RAMBlock *block = qemu_get_ram_block(addr);
1644 if (xen_enabled()) {
1645 /* We need to check if the requested address is in the RAM
1646 * because we don't want to map the entire memory in QEMU.
1647 * In that case just map until the end of the page.
1649 if (block->offset == 0) {
1650 return xen_map_cache(addr, 0, 0);
1651 } else if (block->host == NULL) {
1652 block->host =
1653 xen_map_cache(block->offset, block->max_length, 1);
1656 return ramblock_ptr(block, addr - block->offset);
1659 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1660 * but takes a size argument */
1661 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1663 if (*size == 0) {
1664 return NULL;
1666 if (xen_enabled()) {
1667 return xen_map_cache(addr, *size, 1);
1668 } else {
1669 RAMBlock *block;
1671 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1672 if (addr - block->offset < block->max_length) {
1673 if (addr - block->offset + *size > block->max_length)
1674 *size = block->max_length - addr + block->offset;
1675 return ramblock_ptr(block, addr - block->offset);
1679 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1680 abort();
1684 /* Some of the softmmu routines need to translate from a host pointer
1685 (typically a TLB entry) back to a ram offset. */
1686 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1688 RAMBlock *block;
1689 uint8_t *host = ptr;
1691 if (xen_enabled()) {
1692 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1693 return qemu_get_ram_block(*ram_addr)->mr;
1696 block = ram_list.mru_block;
1697 if (block && block->host && host - block->host < block->max_length) {
1698 goto found;
1701 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1702 /* This case append when the block is not mapped. */
1703 if (block->host == NULL) {
1704 continue;
1706 if (host - block->host < block->max_length) {
1707 goto found;
1711 return NULL;
1713 found:
1714 *ram_addr = block->offset + (host - block->host);
1715 return block->mr;
1718 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1719 uint64_t val, unsigned size)
1721 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1722 tb_invalidate_phys_page_fast(ram_addr, size);
1724 switch (size) {
1725 case 1:
1726 stb_p(qemu_get_ram_ptr(ram_addr), val);
1727 break;
1728 case 2:
1729 stw_p(qemu_get_ram_ptr(ram_addr), val);
1730 break;
1731 case 4:
1732 stl_p(qemu_get_ram_ptr(ram_addr), val);
1733 break;
1734 default:
1735 abort();
1737 cpu_physical_memory_set_dirty_range_nocode(ram_addr, size);
1738 /* we remove the notdirty callback only if the code has been
1739 flushed */
1740 if (!cpu_physical_memory_is_clean(ram_addr)) {
1741 CPUArchState *env = current_cpu->env_ptr;
1742 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1746 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1747 unsigned size, bool is_write)
1749 return is_write;
1752 static const MemoryRegionOps notdirty_mem_ops = {
1753 .write = notdirty_mem_write,
1754 .valid.accepts = notdirty_mem_accepts,
1755 .endianness = DEVICE_NATIVE_ENDIAN,
1758 /* Generate a debug exception if a watchpoint has been hit. */
1759 static void check_watchpoint(int offset, int len, int flags)
1761 CPUState *cpu = current_cpu;
1762 CPUArchState *env = cpu->env_ptr;
1763 target_ulong pc, cs_base;
1764 target_ulong vaddr;
1765 CPUWatchpoint *wp;
1766 int cpu_flags;
1768 if (cpu->watchpoint_hit) {
1769 /* We re-entered the check after replacing the TB. Now raise
1770 * the debug interrupt so that is will trigger after the
1771 * current instruction. */
1772 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1773 return;
1775 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1776 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1777 if (cpu_watchpoint_address_matches(wp, vaddr, len)
1778 && (wp->flags & flags)) {
1779 if (flags == BP_MEM_READ) {
1780 wp->flags |= BP_WATCHPOINT_HIT_READ;
1781 } else {
1782 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
1784 wp->hitaddr = vaddr;
1785 if (!cpu->watchpoint_hit) {
1786 cpu->watchpoint_hit = wp;
1787 tb_check_watchpoint(cpu);
1788 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1789 cpu->exception_index = EXCP_DEBUG;
1790 cpu_loop_exit(cpu);
1791 } else {
1792 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1793 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1794 cpu_resume_from_signal(cpu, NULL);
1797 } else {
1798 wp->flags &= ~BP_WATCHPOINT_HIT;
1803 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1804 so these check for a hit then pass through to the normal out-of-line
1805 phys routines. */
1806 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1807 unsigned size)
1809 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_READ);
1810 switch (size) {
1811 case 1: return ldub_phys(&address_space_memory, addr);
1812 case 2: return lduw_phys(&address_space_memory, addr);
1813 case 4: return ldl_phys(&address_space_memory, addr);
1814 default: abort();
1818 static void watch_mem_write(void *opaque, hwaddr addr,
1819 uint64_t val, unsigned size)
1821 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_WRITE);
1822 switch (size) {
1823 case 1:
1824 stb_phys(&address_space_memory, addr, val);
1825 break;
1826 case 2:
1827 stw_phys(&address_space_memory, addr, val);
1828 break;
1829 case 4:
1830 stl_phys(&address_space_memory, addr, val);
1831 break;
1832 default: abort();
1836 static const MemoryRegionOps watch_mem_ops = {
1837 .read = watch_mem_read,
1838 .write = watch_mem_write,
1839 .endianness = DEVICE_NATIVE_ENDIAN,
1842 static uint64_t subpage_read(void *opaque, hwaddr addr,
1843 unsigned len)
1845 subpage_t *subpage = opaque;
1846 uint8_t buf[8];
1848 #if defined(DEBUG_SUBPAGE)
1849 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1850 subpage, len, addr);
1851 #endif
1852 address_space_read(subpage->as, addr + subpage->base, buf, len);
1853 switch (len) {
1854 case 1:
1855 return ldub_p(buf);
1856 case 2:
1857 return lduw_p(buf);
1858 case 4:
1859 return ldl_p(buf);
1860 case 8:
1861 return ldq_p(buf);
1862 default:
1863 abort();
1867 static void subpage_write(void *opaque, hwaddr addr,
1868 uint64_t value, unsigned len)
1870 subpage_t *subpage = opaque;
1871 uint8_t buf[8];
1873 #if defined(DEBUG_SUBPAGE)
1874 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1875 " value %"PRIx64"\n",
1876 __func__, subpage, len, addr, value);
1877 #endif
1878 switch (len) {
1879 case 1:
1880 stb_p(buf, value);
1881 break;
1882 case 2:
1883 stw_p(buf, value);
1884 break;
1885 case 4:
1886 stl_p(buf, value);
1887 break;
1888 case 8:
1889 stq_p(buf, value);
1890 break;
1891 default:
1892 abort();
1894 address_space_write(subpage->as, addr + subpage->base, buf, len);
1897 static bool subpage_accepts(void *opaque, hwaddr addr,
1898 unsigned len, bool is_write)
1900 subpage_t *subpage = opaque;
1901 #if defined(DEBUG_SUBPAGE)
1902 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1903 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1904 #endif
1906 return address_space_access_valid(subpage->as, addr + subpage->base,
1907 len, is_write);
1910 static const MemoryRegionOps subpage_ops = {
1911 .read = subpage_read,
1912 .write = subpage_write,
1913 .impl.min_access_size = 1,
1914 .impl.max_access_size = 8,
1915 .valid.min_access_size = 1,
1916 .valid.max_access_size = 8,
1917 .valid.accepts = subpage_accepts,
1918 .endianness = DEVICE_NATIVE_ENDIAN,
1921 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1922 uint16_t section)
1924 int idx, eidx;
1926 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1927 return -1;
1928 idx = SUBPAGE_IDX(start);
1929 eidx = SUBPAGE_IDX(end);
1930 #if defined(DEBUG_SUBPAGE)
1931 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1932 __func__, mmio, start, end, idx, eidx, section);
1933 #endif
1934 for (; idx <= eidx; idx++) {
1935 mmio->sub_section[idx] = section;
1938 return 0;
1941 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1943 subpage_t *mmio;
1945 mmio = g_malloc0(sizeof(subpage_t));
1947 mmio->as = as;
1948 mmio->base = base;
1949 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1950 NULL, TARGET_PAGE_SIZE);
1951 mmio->iomem.subpage = true;
1952 #if defined(DEBUG_SUBPAGE)
1953 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1954 mmio, base, TARGET_PAGE_SIZE);
1955 #endif
1956 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1958 return mmio;
1961 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
1962 MemoryRegion *mr)
1964 assert(as);
1965 MemoryRegionSection section = {
1966 .address_space = as,
1967 .mr = mr,
1968 .offset_within_address_space = 0,
1969 .offset_within_region = 0,
1970 .size = int128_2_64(),
1973 return phys_section_add(map, &section);
1976 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
1978 AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch);
1979 MemoryRegionSection *sections = d->map.sections;
1981 return sections[index & ~TARGET_PAGE_MASK].mr;
1984 static void io_mem_init(void)
1986 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
1987 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1988 NULL, UINT64_MAX);
1989 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1990 NULL, UINT64_MAX);
1991 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1992 NULL, UINT64_MAX);
1995 static void mem_begin(MemoryListener *listener)
1997 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1998 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1999 uint16_t n;
2001 n = dummy_section(&d->map, as, &io_mem_unassigned);
2002 assert(n == PHYS_SECTION_UNASSIGNED);
2003 n = dummy_section(&d->map, as, &io_mem_notdirty);
2004 assert(n == PHYS_SECTION_NOTDIRTY);
2005 n = dummy_section(&d->map, as, &io_mem_rom);
2006 assert(n == PHYS_SECTION_ROM);
2007 n = dummy_section(&d->map, as, &io_mem_watch);
2008 assert(n == PHYS_SECTION_WATCH);
2010 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2011 d->as = as;
2012 as->next_dispatch = d;
2015 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2017 phys_sections_free(&d->map);
2018 g_free(d);
2021 static void mem_commit(MemoryListener *listener)
2023 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2024 AddressSpaceDispatch *cur = as->dispatch;
2025 AddressSpaceDispatch *next = as->next_dispatch;
2027 phys_page_compact_all(next, next->map.nodes_nb);
2029 atomic_rcu_set(&as->dispatch, next);
2030 if (cur) {
2031 call_rcu(cur, address_space_dispatch_free, rcu);
2035 static void tcg_commit(MemoryListener *listener)
2037 CPUState *cpu;
2039 /* since each CPU stores ram addresses in its TLB cache, we must
2040 reset the modified entries */
2041 /* XXX: slow ! */
2042 CPU_FOREACH(cpu) {
2043 /* FIXME: Disentangle the cpu.h circular files deps so we can
2044 directly get the right CPU from listener. */
2045 if (cpu->tcg_as_listener != listener) {
2046 continue;
2048 cpu_reload_memory_map(cpu);
2052 static void core_log_global_start(MemoryListener *listener)
2054 cpu_physical_memory_set_dirty_tracking(true);
2057 static void core_log_global_stop(MemoryListener *listener)
2059 cpu_physical_memory_set_dirty_tracking(false);
2062 static MemoryListener core_memory_listener = {
2063 .log_global_start = core_log_global_start,
2064 .log_global_stop = core_log_global_stop,
2065 .priority = 1,
2068 void address_space_init_dispatch(AddressSpace *as)
2070 as->dispatch = NULL;
2071 as->dispatch_listener = (MemoryListener) {
2072 .begin = mem_begin,
2073 .commit = mem_commit,
2074 .region_add = mem_add,
2075 .region_nop = mem_add,
2076 .priority = 0,
2078 memory_listener_register(&as->dispatch_listener, as);
2081 void address_space_unregister(AddressSpace *as)
2083 memory_listener_unregister(&as->dispatch_listener);
2086 void address_space_destroy_dispatch(AddressSpace *as)
2088 AddressSpaceDispatch *d = as->dispatch;
2090 atomic_rcu_set(&as->dispatch, NULL);
2091 if (d) {
2092 call_rcu(d, address_space_dispatch_free, rcu);
2096 static void memory_map_init(void)
2098 system_memory = g_malloc(sizeof(*system_memory));
2100 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2101 address_space_init(&address_space_memory, system_memory, "memory");
2103 system_io = g_malloc(sizeof(*system_io));
2104 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2105 65536);
2106 address_space_init(&address_space_io, system_io, "I/O");
2108 memory_listener_register(&core_memory_listener, &address_space_memory);
2111 MemoryRegion *get_system_memory(void)
2113 return system_memory;
2116 MemoryRegion *get_system_io(void)
2118 return system_io;
2121 #endif /* !defined(CONFIG_USER_ONLY) */
2123 /* physical memory access (slow version, mainly for debug) */
2124 #if defined(CONFIG_USER_ONLY)
2125 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2126 uint8_t *buf, int len, int is_write)
2128 int l, flags;
2129 target_ulong page;
2130 void * p;
2132 while (len > 0) {
2133 page = addr & TARGET_PAGE_MASK;
2134 l = (page + TARGET_PAGE_SIZE) - addr;
2135 if (l > len)
2136 l = len;
2137 flags = page_get_flags(page);
2138 if (!(flags & PAGE_VALID))
2139 return -1;
2140 if (is_write) {
2141 if (!(flags & PAGE_WRITE))
2142 return -1;
2143 /* XXX: this code should not depend on lock_user */
2144 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2145 return -1;
2146 memcpy(p, buf, l);
2147 unlock_user(p, addr, l);
2148 } else {
2149 if (!(flags & PAGE_READ))
2150 return -1;
2151 /* XXX: this code should not depend on lock_user */
2152 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2153 return -1;
2154 memcpy(buf, p, l);
2155 unlock_user(p, addr, 0);
2157 len -= l;
2158 buf += l;
2159 addr += l;
2161 return 0;
2164 #else
2166 static void invalidate_and_set_dirty(hwaddr addr,
2167 hwaddr length)
2169 if (cpu_physical_memory_range_includes_clean(addr, length)) {
2170 tb_invalidate_phys_range(addr, addr + length, 0);
2171 cpu_physical_memory_set_dirty_range_nocode(addr, length);
2173 xen_modified_memory(addr, length);
2176 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2178 unsigned access_size_max = mr->ops->valid.max_access_size;
2180 /* Regions are assumed to support 1-4 byte accesses unless
2181 otherwise specified. */
2182 if (access_size_max == 0) {
2183 access_size_max = 4;
2186 /* Bound the maximum access by the alignment of the address. */
2187 if (!mr->ops->impl.unaligned) {
2188 unsigned align_size_max = addr & -addr;
2189 if (align_size_max != 0 && align_size_max < access_size_max) {
2190 access_size_max = align_size_max;
2194 /* Don't attempt accesses larger than the maximum. */
2195 if (l > access_size_max) {
2196 l = access_size_max;
2198 if (l & (l - 1)) {
2199 l = 1 << (qemu_fls(l) - 1);
2202 return l;
2205 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
2206 int len, bool is_write)
2208 hwaddr l;
2209 uint8_t *ptr;
2210 uint64_t val;
2211 hwaddr addr1;
2212 MemoryRegion *mr;
2213 bool error = false;
2215 while (len > 0) {
2216 l = len;
2217 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2219 if (is_write) {
2220 if (!memory_access_is_direct(mr, is_write)) {
2221 l = memory_access_size(mr, l, addr1);
2222 /* XXX: could force current_cpu to NULL to avoid
2223 potential bugs */
2224 switch (l) {
2225 case 8:
2226 /* 64 bit write access */
2227 val = ldq_p(buf);
2228 error |= io_mem_write(mr, addr1, val, 8);
2229 break;
2230 case 4:
2231 /* 32 bit write access */
2232 val = ldl_p(buf);
2233 error |= io_mem_write(mr, addr1, val, 4);
2234 break;
2235 case 2:
2236 /* 16 bit write access */
2237 val = lduw_p(buf);
2238 error |= io_mem_write(mr, addr1, val, 2);
2239 break;
2240 case 1:
2241 /* 8 bit write access */
2242 val = ldub_p(buf);
2243 error |= io_mem_write(mr, addr1, val, 1);
2244 break;
2245 default:
2246 abort();
2248 } else {
2249 addr1 += memory_region_get_ram_addr(mr);
2250 /* RAM case */
2251 ptr = qemu_get_ram_ptr(addr1);
2252 memcpy(ptr, buf, l);
2253 invalidate_and_set_dirty(addr1, l);
2255 } else {
2256 if (!memory_access_is_direct(mr, is_write)) {
2257 /* I/O case */
2258 l = memory_access_size(mr, l, addr1);
2259 switch (l) {
2260 case 8:
2261 /* 64 bit read access */
2262 error |= io_mem_read(mr, addr1, &val, 8);
2263 stq_p(buf, val);
2264 break;
2265 case 4:
2266 /* 32 bit read access */
2267 error |= io_mem_read(mr, addr1, &val, 4);
2268 stl_p(buf, val);
2269 break;
2270 case 2:
2271 /* 16 bit read access */
2272 error |= io_mem_read(mr, addr1, &val, 2);
2273 stw_p(buf, val);
2274 break;
2275 case 1:
2276 /* 8 bit read access */
2277 error |= io_mem_read(mr, addr1, &val, 1);
2278 stb_p(buf, val);
2279 break;
2280 default:
2281 abort();
2283 } else {
2284 /* RAM case */
2285 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2286 memcpy(buf, ptr, l);
2289 len -= l;
2290 buf += l;
2291 addr += l;
2294 return error;
2297 bool address_space_write(AddressSpace *as, hwaddr addr,
2298 const uint8_t *buf, int len)
2300 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2303 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2305 return address_space_rw(as, addr, buf, len, false);
2309 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2310 int len, int is_write)
2312 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2315 enum write_rom_type {
2316 WRITE_DATA,
2317 FLUSH_CACHE,
2320 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2321 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2323 hwaddr l;
2324 uint8_t *ptr;
2325 hwaddr addr1;
2326 MemoryRegion *mr;
2328 while (len > 0) {
2329 l = len;
2330 mr = address_space_translate(as, addr, &addr1, &l, true);
2332 if (!(memory_region_is_ram(mr) ||
2333 memory_region_is_romd(mr))) {
2334 /* do nothing */
2335 } else {
2336 addr1 += memory_region_get_ram_addr(mr);
2337 /* ROM/RAM case */
2338 ptr = qemu_get_ram_ptr(addr1);
2339 switch (type) {
2340 case WRITE_DATA:
2341 memcpy(ptr, buf, l);
2342 invalidate_and_set_dirty(addr1, l);
2343 break;
2344 case FLUSH_CACHE:
2345 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2346 break;
2349 len -= l;
2350 buf += l;
2351 addr += l;
2355 /* used for ROM loading : can write in RAM and ROM */
2356 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2357 const uint8_t *buf, int len)
2359 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2362 void cpu_flush_icache_range(hwaddr start, int len)
2365 * This function should do the same thing as an icache flush that was
2366 * triggered from within the guest. For TCG we are always cache coherent,
2367 * so there is no need to flush anything. For KVM / Xen we need to flush
2368 * the host's instruction cache at least.
2370 if (tcg_enabled()) {
2371 return;
2374 cpu_physical_memory_write_rom_internal(&address_space_memory,
2375 start, NULL, len, FLUSH_CACHE);
2378 typedef struct {
2379 MemoryRegion *mr;
2380 void *buffer;
2381 hwaddr addr;
2382 hwaddr len;
2383 } BounceBuffer;
2385 static BounceBuffer bounce;
2387 typedef struct MapClient {
2388 void *opaque;
2389 void (*callback)(void *opaque);
2390 QLIST_ENTRY(MapClient) link;
2391 } MapClient;
2393 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2394 = QLIST_HEAD_INITIALIZER(map_client_list);
2396 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2398 MapClient *client = g_malloc(sizeof(*client));
2400 client->opaque = opaque;
2401 client->callback = callback;
2402 QLIST_INSERT_HEAD(&map_client_list, client, link);
2403 return client;
2406 static void cpu_unregister_map_client(void *_client)
2408 MapClient *client = (MapClient *)_client;
2410 QLIST_REMOVE(client, link);
2411 g_free(client);
2414 static void cpu_notify_map_clients(void)
2416 MapClient *client;
2418 while (!QLIST_EMPTY(&map_client_list)) {
2419 client = QLIST_FIRST(&map_client_list);
2420 client->callback(client->opaque);
2421 cpu_unregister_map_client(client);
2425 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2427 MemoryRegion *mr;
2428 hwaddr l, xlat;
2430 while (len > 0) {
2431 l = len;
2432 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2433 if (!memory_access_is_direct(mr, is_write)) {
2434 l = memory_access_size(mr, l, addr);
2435 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2436 return false;
2440 len -= l;
2441 addr += l;
2443 return true;
2446 /* Map a physical memory region into a host virtual address.
2447 * May map a subset of the requested range, given by and returned in *plen.
2448 * May return NULL if resources needed to perform the mapping are exhausted.
2449 * Use only for reads OR writes - not for read-modify-write operations.
2450 * Use cpu_register_map_client() to know when retrying the map operation is
2451 * likely to succeed.
2453 void *address_space_map(AddressSpace *as,
2454 hwaddr addr,
2455 hwaddr *plen,
2456 bool is_write)
2458 hwaddr len = *plen;
2459 hwaddr done = 0;
2460 hwaddr l, xlat, base;
2461 MemoryRegion *mr, *this_mr;
2462 ram_addr_t raddr;
2464 if (len == 0) {
2465 return NULL;
2468 l = len;
2469 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2470 if (!memory_access_is_direct(mr, is_write)) {
2471 if (bounce.buffer) {
2472 return NULL;
2474 /* Avoid unbounded allocations */
2475 l = MIN(l, TARGET_PAGE_SIZE);
2476 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2477 bounce.addr = addr;
2478 bounce.len = l;
2480 memory_region_ref(mr);
2481 bounce.mr = mr;
2482 if (!is_write) {
2483 address_space_read(as, addr, bounce.buffer, l);
2486 *plen = l;
2487 return bounce.buffer;
2490 base = xlat;
2491 raddr = memory_region_get_ram_addr(mr);
2493 for (;;) {
2494 len -= l;
2495 addr += l;
2496 done += l;
2497 if (len == 0) {
2498 break;
2501 l = len;
2502 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2503 if (this_mr != mr || xlat != base + done) {
2504 break;
2508 memory_region_ref(mr);
2509 *plen = done;
2510 return qemu_ram_ptr_length(raddr + base, plen);
2513 /* Unmaps a memory region previously mapped by address_space_map().
2514 * Will also mark the memory as dirty if is_write == 1. access_len gives
2515 * the amount of memory that was actually read or written by the caller.
2517 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2518 int is_write, hwaddr access_len)
2520 if (buffer != bounce.buffer) {
2521 MemoryRegion *mr;
2522 ram_addr_t addr1;
2524 mr = qemu_ram_addr_from_host(buffer, &addr1);
2525 assert(mr != NULL);
2526 if (is_write) {
2527 invalidate_and_set_dirty(addr1, access_len);
2529 if (xen_enabled()) {
2530 xen_invalidate_map_cache_entry(buffer);
2532 memory_region_unref(mr);
2533 return;
2535 if (is_write) {
2536 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2538 qemu_vfree(bounce.buffer);
2539 bounce.buffer = NULL;
2540 memory_region_unref(bounce.mr);
2541 cpu_notify_map_clients();
2544 void *cpu_physical_memory_map(hwaddr addr,
2545 hwaddr *plen,
2546 int is_write)
2548 return address_space_map(&address_space_memory, addr, plen, is_write);
2551 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2552 int is_write, hwaddr access_len)
2554 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2557 /* warning: addr must be aligned */
2558 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2559 enum device_endian endian)
2561 uint8_t *ptr;
2562 uint64_t val;
2563 MemoryRegion *mr;
2564 hwaddr l = 4;
2565 hwaddr addr1;
2567 mr = address_space_translate(as, addr, &addr1, &l, false);
2568 if (l < 4 || !memory_access_is_direct(mr, false)) {
2569 /* I/O case */
2570 io_mem_read(mr, addr1, &val, 4);
2571 #if defined(TARGET_WORDS_BIGENDIAN)
2572 if (endian == DEVICE_LITTLE_ENDIAN) {
2573 val = bswap32(val);
2575 #else
2576 if (endian == DEVICE_BIG_ENDIAN) {
2577 val = bswap32(val);
2579 #endif
2580 } else {
2581 /* RAM case */
2582 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2583 & TARGET_PAGE_MASK)
2584 + addr1);
2585 switch (endian) {
2586 case DEVICE_LITTLE_ENDIAN:
2587 val = ldl_le_p(ptr);
2588 break;
2589 case DEVICE_BIG_ENDIAN:
2590 val = ldl_be_p(ptr);
2591 break;
2592 default:
2593 val = ldl_p(ptr);
2594 break;
2597 return val;
2600 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2602 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2605 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2607 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2610 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2612 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2615 /* warning: addr must be aligned */
2616 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2617 enum device_endian endian)
2619 uint8_t *ptr;
2620 uint64_t val;
2621 MemoryRegion *mr;
2622 hwaddr l = 8;
2623 hwaddr addr1;
2625 mr = address_space_translate(as, addr, &addr1, &l,
2626 false);
2627 if (l < 8 || !memory_access_is_direct(mr, false)) {
2628 /* I/O case */
2629 io_mem_read(mr, addr1, &val, 8);
2630 #if defined(TARGET_WORDS_BIGENDIAN)
2631 if (endian == DEVICE_LITTLE_ENDIAN) {
2632 val = bswap64(val);
2634 #else
2635 if (endian == DEVICE_BIG_ENDIAN) {
2636 val = bswap64(val);
2638 #endif
2639 } else {
2640 /* RAM case */
2641 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2642 & TARGET_PAGE_MASK)
2643 + addr1);
2644 switch (endian) {
2645 case DEVICE_LITTLE_ENDIAN:
2646 val = ldq_le_p(ptr);
2647 break;
2648 case DEVICE_BIG_ENDIAN:
2649 val = ldq_be_p(ptr);
2650 break;
2651 default:
2652 val = ldq_p(ptr);
2653 break;
2656 return val;
2659 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2661 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2664 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2666 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2669 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2671 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2674 /* XXX: optimize */
2675 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2677 uint8_t val;
2678 address_space_rw(as, addr, &val, 1, 0);
2679 return val;
2682 /* warning: addr must be aligned */
2683 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2684 enum device_endian endian)
2686 uint8_t *ptr;
2687 uint64_t val;
2688 MemoryRegion *mr;
2689 hwaddr l = 2;
2690 hwaddr addr1;
2692 mr = address_space_translate(as, addr, &addr1, &l,
2693 false);
2694 if (l < 2 || !memory_access_is_direct(mr, false)) {
2695 /* I/O case */
2696 io_mem_read(mr, addr1, &val, 2);
2697 #if defined(TARGET_WORDS_BIGENDIAN)
2698 if (endian == DEVICE_LITTLE_ENDIAN) {
2699 val = bswap16(val);
2701 #else
2702 if (endian == DEVICE_BIG_ENDIAN) {
2703 val = bswap16(val);
2705 #endif
2706 } else {
2707 /* RAM case */
2708 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2709 & TARGET_PAGE_MASK)
2710 + addr1);
2711 switch (endian) {
2712 case DEVICE_LITTLE_ENDIAN:
2713 val = lduw_le_p(ptr);
2714 break;
2715 case DEVICE_BIG_ENDIAN:
2716 val = lduw_be_p(ptr);
2717 break;
2718 default:
2719 val = lduw_p(ptr);
2720 break;
2723 return val;
2726 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2728 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2731 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2733 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2736 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2738 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2741 /* warning: addr must be aligned. The ram page is not masked as dirty
2742 and the code inside is not invalidated. It is useful if the dirty
2743 bits are used to track modified PTEs */
2744 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2746 uint8_t *ptr;
2747 MemoryRegion *mr;
2748 hwaddr l = 4;
2749 hwaddr addr1;
2751 mr = address_space_translate(as, addr, &addr1, &l,
2752 true);
2753 if (l < 4 || !memory_access_is_direct(mr, true)) {
2754 io_mem_write(mr, addr1, val, 4);
2755 } else {
2756 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2757 ptr = qemu_get_ram_ptr(addr1);
2758 stl_p(ptr, val);
2760 if (unlikely(in_migration)) {
2761 if (cpu_physical_memory_is_clean(addr1)) {
2762 /* invalidate code */
2763 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2764 /* set dirty bit */
2765 cpu_physical_memory_set_dirty_range_nocode(addr1, 4);
2771 /* warning: addr must be aligned */
2772 static inline void stl_phys_internal(AddressSpace *as,
2773 hwaddr addr, uint32_t val,
2774 enum device_endian endian)
2776 uint8_t *ptr;
2777 MemoryRegion *mr;
2778 hwaddr l = 4;
2779 hwaddr addr1;
2781 mr = address_space_translate(as, addr, &addr1, &l,
2782 true);
2783 if (l < 4 || !memory_access_is_direct(mr, true)) {
2784 #if defined(TARGET_WORDS_BIGENDIAN)
2785 if (endian == DEVICE_LITTLE_ENDIAN) {
2786 val = bswap32(val);
2788 #else
2789 if (endian == DEVICE_BIG_ENDIAN) {
2790 val = bswap32(val);
2792 #endif
2793 io_mem_write(mr, addr1, val, 4);
2794 } else {
2795 /* RAM case */
2796 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2797 ptr = qemu_get_ram_ptr(addr1);
2798 switch (endian) {
2799 case DEVICE_LITTLE_ENDIAN:
2800 stl_le_p(ptr, val);
2801 break;
2802 case DEVICE_BIG_ENDIAN:
2803 stl_be_p(ptr, val);
2804 break;
2805 default:
2806 stl_p(ptr, val);
2807 break;
2809 invalidate_and_set_dirty(addr1, 4);
2813 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2815 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2818 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2820 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2823 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2825 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2828 /* XXX: optimize */
2829 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2831 uint8_t v = val;
2832 address_space_rw(as, addr, &v, 1, 1);
2835 /* warning: addr must be aligned */
2836 static inline void stw_phys_internal(AddressSpace *as,
2837 hwaddr addr, uint32_t val,
2838 enum device_endian endian)
2840 uint8_t *ptr;
2841 MemoryRegion *mr;
2842 hwaddr l = 2;
2843 hwaddr addr1;
2845 mr = address_space_translate(as, addr, &addr1, &l, true);
2846 if (l < 2 || !memory_access_is_direct(mr, true)) {
2847 #if defined(TARGET_WORDS_BIGENDIAN)
2848 if (endian == DEVICE_LITTLE_ENDIAN) {
2849 val = bswap16(val);
2851 #else
2852 if (endian == DEVICE_BIG_ENDIAN) {
2853 val = bswap16(val);
2855 #endif
2856 io_mem_write(mr, addr1, val, 2);
2857 } else {
2858 /* RAM case */
2859 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2860 ptr = qemu_get_ram_ptr(addr1);
2861 switch (endian) {
2862 case DEVICE_LITTLE_ENDIAN:
2863 stw_le_p(ptr, val);
2864 break;
2865 case DEVICE_BIG_ENDIAN:
2866 stw_be_p(ptr, val);
2867 break;
2868 default:
2869 stw_p(ptr, val);
2870 break;
2872 invalidate_and_set_dirty(addr1, 2);
2876 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2878 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2881 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2883 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2886 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2888 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2891 /* XXX: optimize */
2892 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2894 val = tswap64(val);
2895 address_space_rw(as, addr, (void *) &val, 8, 1);
2898 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2900 val = cpu_to_le64(val);
2901 address_space_rw(as, addr, (void *) &val, 8, 1);
2904 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2906 val = cpu_to_be64(val);
2907 address_space_rw(as, addr, (void *) &val, 8, 1);
2910 /* virtual memory access for debug (includes writing to ROM) */
2911 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2912 uint8_t *buf, int len, int is_write)
2914 int l;
2915 hwaddr phys_addr;
2916 target_ulong page;
2918 while (len > 0) {
2919 page = addr & TARGET_PAGE_MASK;
2920 phys_addr = cpu_get_phys_page_debug(cpu, page);
2921 /* if no physical page mapped, return an error */
2922 if (phys_addr == -1)
2923 return -1;
2924 l = (page + TARGET_PAGE_SIZE) - addr;
2925 if (l > len)
2926 l = len;
2927 phys_addr += (addr & ~TARGET_PAGE_MASK);
2928 if (is_write) {
2929 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
2930 } else {
2931 address_space_rw(cpu->as, phys_addr, buf, l, 0);
2933 len -= l;
2934 buf += l;
2935 addr += l;
2937 return 0;
2939 #endif
2942 * A helper function for the _utterly broken_ virtio device model to find out if
2943 * it's running on a big endian machine. Don't do this at home kids!
2945 bool target_words_bigendian(void);
2946 bool target_words_bigendian(void)
2948 #if defined(TARGET_WORDS_BIGENDIAN)
2949 return true;
2950 #else
2951 return false;
2952 #endif
2955 #ifndef CONFIG_USER_ONLY
2956 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2958 MemoryRegion*mr;
2959 hwaddr l = 1;
2961 mr = address_space_translate(&address_space_memory,
2962 phys_addr, &phys_addr, &l, false);
2964 return !(memory_region_is_ram(mr) ||
2965 memory_region_is_romd(mr));
2968 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2970 RAMBlock *block;
2972 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2973 func(block->host, block->offset, block->used_length, opaque);
2976 #endif