qemu-img: Plug memory leak on block option help error path
[qemu/qmp-unstable.git] / exec.c
blob4e179a6f66e736e7d8741ee3b568945dd02cd04c
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #include "hw/qdev.h"
30 #include "qemu/osdep.h"
31 #include "sysemu/kvm.h"
32 #include "sysemu/sysemu.h"
33 #include "hw/xen/xen.h"
34 #include "qemu/timer.h"
35 #include "qemu/config-file.h"
36 #include "qemu/error-report.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
51 #include "exec/memory-internal.h"
52 #include "exec/ram_addr.h"
53 #include "qemu/cache-utils.h"
55 #include "qemu/range.h"
57 //#define DEBUG_SUBPAGE
59 #if !defined(CONFIG_USER_ONLY)
60 static bool in_migration;
62 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
64 static MemoryRegion *system_memory;
65 static MemoryRegion *system_io;
67 AddressSpace address_space_io;
68 AddressSpace address_space_memory;
70 MemoryRegion io_mem_rom, io_mem_notdirty;
71 static MemoryRegion io_mem_unassigned;
73 #endif
75 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
76 /* current CPU in the current thread. It is only valid inside
77 cpu_exec() */
78 DEFINE_TLS(CPUState *, current_cpu);
79 /* 0 = Do not count executed instructions.
80 1 = Precise instruction counting.
81 2 = Adaptive rate instruction counting. */
82 int use_icount;
84 #if !defined(CONFIG_USER_ONLY)
86 typedef struct PhysPageEntry PhysPageEntry;
88 struct PhysPageEntry {
89 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
90 uint32_t skip : 6;
91 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
92 uint32_t ptr : 26;
95 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
97 /* Size of the L2 (and L3, etc) page tables. */
98 #define ADDR_SPACE_BITS 64
100 #define P_L2_BITS 9
101 #define P_L2_SIZE (1 << P_L2_BITS)
103 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
105 typedef PhysPageEntry Node[P_L2_SIZE];
107 typedef struct PhysPageMap {
108 unsigned sections_nb;
109 unsigned sections_nb_alloc;
110 unsigned nodes_nb;
111 unsigned nodes_nb_alloc;
112 Node *nodes;
113 MemoryRegionSection *sections;
114 } PhysPageMap;
116 struct AddressSpaceDispatch {
117 /* This is a multi-level map on the physical address space.
118 * The bottom level has pointers to MemoryRegionSections.
120 PhysPageEntry phys_map;
121 PhysPageMap map;
122 AddressSpace *as;
125 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
126 typedef struct subpage_t {
127 MemoryRegion iomem;
128 AddressSpace *as;
129 hwaddr base;
130 uint16_t sub_section[TARGET_PAGE_SIZE];
131 } subpage_t;
133 #define PHYS_SECTION_UNASSIGNED 0
134 #define PHYS_SECTION_NOTDIRTY 1
135 #define PHYS_SECTION_ROM 2
136 #define PHYS_SECTION_WATCH 3
138 static void io_mem_init(void);
139 static void memory_map_init(void);
140 static void tcg_commit(MemoryListener *listener);
142 static MemoryRegion io_mem_watch;
143 #endif
145 #if !defined(CONFIG_USER_ONLY)
147 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
149 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
150 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
151 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
152 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
156 static uint32_t phys_map_node_alloc(PhysPageMap *map)
158 unsigned i;
159 uint32_t ret;
161 ret = map->nodes_nb++;
162 assert(ret != PHYS_MAP_NODE_NIL);
163 assert(ret != map->nodes_nb_alloc);
164 for (i = 0; i < P_L2_SIZE; ++i) {
165 map->nodes[ret][i].skip = 1;
166 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
168 return ret;
171 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
172 hwaddr *index, hwaddr *nb, uint16_t leaf,
173 int level)
175 PhysPageEntry *p;
176 int i;
177 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
179 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
180 lp->ptr = phys_map_node_alloc(map);
181 p = map->nodes[lp->ptr];
182 if (level == 0) {
183 for (i = 0; i < P_L2_SIZE; i++) {
184 p[i].skip = 0;
185 p[i].ptr = PHYS_SECTION_UNASSIGNED;
188 } else {
189 p = map->nodes[lp->ptr];
191 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
193 while (*nb && lp < &p[P_L2_SIZE]) {
194 if ((*index & (step - 1)) == 0 && *nb >= step) {
195 lp->skip = 0;
196 lp->ptr = leaf;
197 *index += step;
198 *nb -= step;
199 } else {
200 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
202 ++lp;
206 static void phys_page_set(AddressSpaceDispatch *d,
207 hwaddr index, hwaddr nb,
208 uint16_t leaf)
210 /* Wildly overreserve - it doesn't matter much. */
211 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
213 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
216 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
217 * and update our entry so we can skip it and go directly to the destination.
219 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
221 unsigned valid_ptr = P_L2_SIZE;
222 int valid = 0;
223 PhysPageEntry *p;
224 int i;
226 if (lp->ptr == PHYS_MAP_NODE_NIL) {
227 return;
230 p = nodes[lp->ptr];
231 for (i = 0; i < P_L2_SIZE; i++) {
232 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
233 continue;
236 valid_ptr = i;
237 valid++;
238 if (p[i].skip) {
239 phys_page_compact(&p[i], nodes, compacted);
243 /* We can only compress if there's only one child. */
244 if (valid != 1) {
245 return;
248 assert(valid_ptr < P_L2_SIZE);
250 /* Don't compress if it won't fit in the # of bits we have. */
251 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
252 return;
255 lp->ptr = p[valid_ptr].ptr;
256 if (!p[valid_ptr].skip) {
257 /* If our only child is a leaf, make this a leaf. */
258 /* By design, we should have made this node a leaf to begin with so we
259 * should never reach here.
260 * But since it's so simple to handle this, let's do it just in case we
261 * change this rule.
263 lp->skip = 0;
264 } else {
265 lp->skip += p[valid_ptr].skip;
269 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
271 DECLARE_BITMAP(compacted, nodes_nb);
273 if (d->phys_map.skip) {
274 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
278 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
279 Node *nodes, MemoryRegionSection *sections)
281 PhysPageEntry *p;
282 hwaddr index = addr >> TARGET_PAGE_BITS;
283 int i;
285 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
286 if (lp.ptr == PHYS_MAP_NODE_NIL) {
287 return &sections[PHYS_SECTION_UNASSIGNED];
289 p = nodes[lp.ptr];
290 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
293 if (sections[lp.ptr].size.hi ||
294 range_covers_byte(sections[lp.ptr].offset_within_address_space,
295 sections[lp.ptr].size.lo, addr)) {
296 return &sections[lp.ptr];
297 } else {
298 return &sections[PHYS_SECTION_UNASSIGNED];
302 bool memory_region_is_unassigned(MemoryRegion *mr)
304 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
305 && mr != &io_mem_watch;
308 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
309 hwaddr addr,
310 bool resolve_subpage)
312 MemoryRegionSection *section;
313 subpage_t *subpage;
315 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
316 if (resolve_subpage && section->mr->subpage) {
317 subpage = container_of(section->mr, subpage_t, iomem);
318 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
320 return section;
323 static MemoryRegionSection *
324 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
325 hwaddr *plen, bool resolve_subpage)
327 MemoryRegionSection *section;
328 Int128 diff;
330 section = address_space_lookup_region(d, addr, resolve_subpage);
331 /* Compute offset within MemoryRegionSection */
332 addr -= section->offset_within_address_space;
334 /* Compute offset within MemoryRegion */
335 *xlat = addr + section->offset_within_region;
337 diff = int128_sub(section->mr->size, int128_make64(addr));
338 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
339 return section;
342 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
344 if (memory_region_is_ram(mr)) {
345 return !(is_write && mr->readonly);
347 if (memory_region_is_romd(mr)) {
348 return !is_write;
351 return false;
354 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
355 hwaddr *xlat, hwaddr *plen,
356 bool is_write)
358 IOMMUTLBEntry iotlb;
359 MemoryRegionSection *section;
360 MemoryRegion *mr;
361 hwaddr len = *plen;
363 for (;;) {
364 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
365 mr = section->mr;
367 if (!mr->iommu_ops) {
368 break;
371 iotlb = mr->iommu_ops->translate(mr, addr);
372 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
373 | (addr & iotlb.addr_mask));
374 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
375 if (!(iotlb.perm & (1 << is_write))) {
376 mr = &io_mem_unassigned;
377 break;
380 as = iotlb.target_as;
383 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
384 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
385 len = MIN(page, len);
388 *plen = len;
389 *xlat = addr;
390 return mr;
393 MemoryRegionSection *
394 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
395 hwaddr *plen)
397 MemoryRegionSection *section;
398 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
400 assert(!section->mr->iommu_ops);
401 return section;
403 #endif
405 void cpu_exec_init_all(void)
407 #if !defined(CONFIG_USER_ONLY)
408 qemu_mutex_init(&ram_list.mutex);
409 memory_map_init();
410 io_mem_init();
411 #endif
414 #if !defined(CONFIG_USER_ONLY)
416 static int cpu_common_post_load(void *opaque, int version_id)
418 CPUState *cpu = opaque;
420 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
421 version_id is increased. */
422 cpu->interrupt_request &= ~0x01;
423 tlb_flush(cpu, 1);
425 return 0;
428 const VMStateDescription vmstate_cpu_common = {
429 .name = "cpu_common",
430 .version_id = 1,
431 .minimum_version_id = 1,
432 .post_load = cpu_common_post_load,
433 .fields = (VMStateField[]) {
434 VMSTATE_UINT32(halted, CPUState),
435 VMSTATE_UINT32(interrupt_request, CPUState),
436 VMSTATE_END_OF_LIST()
440 #endif
442 CPUState *qemu_get_cpu(int index)
444 CPUState *cpu;
446 CPU_FOREACH(cpu) {
447 if (cpu->cpu_index == index) {
448 return cpu;
452 return NULL;
455 #if !defined(CONFIG_USER_ONLY)
456 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
458 /* We only support one address space per cpu at the moment. */
459 assert(cpu->as == as);
461 if (cpu->tcg_as_listener) {
462 memory_listener_unregister(cpu->tcg_as_listener);
463 } else {
464 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
466 cpu->tcg_as_listener->commit = tcg_commit;
467 memory_listener_register(cpu->tcg_as_listener, as);
469 #endif
471 void cpu_exec_init(CPUArchState *env)
473 CPUState *cpu = ENV_GET_CPU(env);
474 CPUClass *cc = CPU_GET_CLASS(cpu);
475 CPUState *some_cpu;
476 int cpu_index;
478 #if defined(CONFIG_USER_ONLY)
479 cpu_list_lock();
480 #endif
481 cpu_index = 0;
482 CPU_FOREACH(some_cpu) {
483 cpu_index++;
485 cpu->cpu_index = cpu_index;
486 cpu->numa_node = 0;
487 QTAILQ_INIT(&cpu->breakpoints);
488 QTAILQ_INIT(&cpu->watchpoints);
489 #ifndef CONFIG_USER_ONLY
490 cpu->as = &address_space_memory;
491 cpu->thread_id = qemu_get_thread_id();
492 #endif
493 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
494 #if defined(CONFIG_USER_ONLY)
495 cpu_list_unlock();
496 #endif
497 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
498 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
500 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
501 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
502 cpu_save, cpu_load, env);
503 assert(cc->vmsd == NULL);
504 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
505 #endif
506 if (cc->vmsd != NULL) {
507 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
511 #if defined(TARGET_HAS_ICE)
512 #if defined(CONFIG_USER_ONLY)
513 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
515 tb_invalidate_phys_page_range(pc, pc + 1, 0);
517 #else
518 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
520 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
521 if (phys != -1) {
522 tb_invalidate_phys_addr(cpu->as,
523 phys | (pc & ~TARGET_PAGE_MASK));
526 #endif
527 #endif /* TARGET_HAS_ICE */
529 #if defined(CONFIG_USER_ONLY)
530 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
535 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
536 int flags, CPUWatchpoint **watchpoint)
538 return -ENOSYS;
540 #else
541 /* Add a watchpoint. */
542 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
543 int flags, CPUWatchpoint **watchpoint)
545 vaddr len_mask = ~(len - 1);
546 CPUWatchpoint *wp;
548 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
549 if ((len & (len - 1)) || (addr & ~len_mask) ||
550 len == 0 || len > TARGET_PAGE_SIZE) {
551 error_report("tried to set invalid watchpoint at %"
552 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
553 return -EINVAL;
555 wp = g_malloc(sizeof(*wp));
557 wp->vaddr = addr;
558 wp->len_mask = len_mask;
559 wp->flags = flags;
561 /* keep all GDB-injected watchpoints in front */
562 if (flags & BP_GDB) {
563 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
564 } else {
565 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
568 tlb_flush_page(cpu, addr);
570 if (watchpoint)
571 *watchpoint = wp;
572 return 0;
575 /* Remove a specific watchpoint. */
576 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
577 int flags)
579 vaddr len_mask = ~(len - 1);
580 CPUWatchpoint *wp;
582 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
583 if (addr == wp->vaddr && len_mask == wp->len_mask
584 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
585 cpu_watchpoint_remove_by_ref(cpu, wp);
586 return 0;
589 return -ENOENT;
592 /* Remove a specific watchpoint by reference. */
593 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
595 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
597 tlb_flush_page(cpu, watchpoint->vaddr);
599 g_free(watchpoint);
602 /* Remove all matching watchpoints. */
603 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
605 CPUWatchpoint *wp, *next;
607 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
608 if (wp->flags & mask) {
609 cpu_watchpoint_remove_by_ref(cpu, wp);
613 #endif
615 /* Add a breakpoint. */
616 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
617 CPUBreakpoint **breakpoint)
619 #if defined(TARGET_HAS_ICE)
620 CPUBreakpoint *bp;
622 bp = g_malloc(sizeof(*bp));
624 bp->pc = pc;
625 bp->flags = flags;
627 /* keep all GDB-injected breakpoints in front */
628 if (flags & BP_GDB) {
629 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
630 } else {
631 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
634 breakpoint_invalidate(cpu, pc);
636 if (breakpoint) {
637 *breakpoint = bp;
639 return 0;
640 #else
641 return -ENOSYS;
642 #endif
645 /* Remove a specific breakpoint. */
646 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
648 #if defined(TARGET_HAS_ICE)
649 CPUBreakpoint *bp;
651 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
652 if (bp->pc == pc && bp->flags == flags) {
653 cpu_breakpoint_remove_by_ref(cpu, bp);
654 return 0;
657 return -ENOENT;
658 #else
659 return -ENOSYS;
660 #endif
663 /* Remove a specific breakpoint by reference. */
664 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
666 #if defined(TARGET_HAS_ICE)
667 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
669 breakpoint_invalidate(cpu, breakpoint->pc);
671 g_free(breakpoint);
672 #endif
675 /* Remove all matching breakpoints. */
676 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
678 #if defined(TARGET_HAS_ICE)
679 CPUBreakpoint *bp, *next;
681 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
682 if (bp->flags & mask) {
683 cpu_breakpoint_remove_by_ref(cpu, bp);
686 #endif
689 /* enable or disable single step mode. EXCP_DEBUG is returned by the
690 CPU loop after each instruction */
691 void cpu_single_step(CPUState *cpu, int enabled)
693 #if defined(TARGET_HAS_ICE)
694 if (cpu->singlestep_enabled != enabled) {
695 cpu->singlestep_enabled = enabled;
696 if (kvm_enabled()) {
697 kvm_update_guest_debug(cpu, 0);
698 } else {
699 /* must flush all the translated code to avoid inconsistencies */
700 /* XXX: only flush what is necessary */
701 CPUArchState *env = cpu->env_ptr;
702 tb_flush(env);
705 #endif
708 void cpu_abort(CPUState *cpu, const char *fmt, ...)
710 va_list ap;
711 va_list ap2;
713 va_start(ap, fmt);
714 va_copy(ap2, ap);
715 fprintf(stderr, "qemu: fatal: ");
716 vfprintf(stderr, fmt, ap);
717 fprintf(stderr, "\n");
718 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
719 if (qemu_log_enabled()) {
720 qemu_log("qemu: fatal: ");
721 qemu_log_vprintf(fmt, ap2);
722 qemu_log("\n");
723 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
724 qemu_log_flush();
725 qemu_log_close();
727 va_end(ap2);
728 va_end(ap);
729 #if defined(CONFIG_USER_ONLY)
731 struct sigaction act;
732 sigfillset(&act.sa_mask);
733 act.sa_handler = SIG_DFL;
734 sigaction(SIGABRT, &act, NULL);
736 #endif
737 abort();
740 #if !defined(CONFIG_USER_ONLY)
741 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
743 RAMBlock *block;
745 /* The list is protected by the iothread lock here. */
746 block = ram_list.mru_block;
747 if (block && addr - block->offset < block->length) {
748 goto found;
750 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
751 if (addr - block->offset < block->length) {
752 goto found;
756 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
757 abort();
759 found:
760 ram_list.mru_block = block;
761 return block;
764 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
766 ram_addr_t start1;
767 RAMBlock *block;
768 ram_addr_t end;
770 end = TARGET_PAGE_ALIGN(start + length);
771 start &= TARGET_PAGE_MASK;
773 block = qemu_get_ram_block(start);
774 assert(block == qemu_get_ram_block(end - 1));
775 start1 = (uintptr_t)block->host + (start - block->offset);
776 cpu_tlb_reset_dirty_all(start1, length);
779 /* Note: start and end must be within the same ram block. */
780 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
781 unsigned client)
783 if (length == 0)
784 return;
785 cpu_physical_memory_clear_dirty_range(start, length, client);
787 if (tcg_enabled()) {
788 tlb_reset_dirty_range_all(start, length);
792 static void cpu_physical_memory_set_dirty_tracking(bool enable)
794 in_migration = enable;
797 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
798 MemoryRegionSection *section,
799 target_ulong vaddr,
800 hwaddr paddr, hwaddr xlat,
801 int prot,
802 target_ulong *address)
804 hwaddr iotlb;
805 CPUWatchpoint *wp;
807 if (memory_region_is_ram(section->mr)) {
808 /* Normal RAM. */
809 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
810 + xlat;
811 if (!section->readonly) {
812 iotlb |= PHYS_SECTION_NOTDIRTY;
813 } else {
814 iotlb |= PHYS_SECTION_ROM;
816 } else {
817 iotlb = section - section->address_space->dispatch->map.sections;
818 iotlb += xlat;
821 /* Make accesses to pages with watchpoints go via the
822 watchpoint trap routines. */
823 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
824 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
825 /* Avoid trapping reads of pages with a write breakpoint. */
826 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
827 iotlb = PHYS_SECTION_WATCH + paddr;
828 *address |= TLB_MMIO;
829 break;
834 return iotlb;
836 #endif /* defined(CONFIG_USER_ONLY) */
838 #if !defined(CONFIG_USER_ONLY)
840 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
841 uint16_t section);
842 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
844 static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
847 * Set a custom physical guest memory alloator.
848 * Accelerators with unusual needs may need this. Hopefully, we can
849 * get rid of it eventually.
851 void phys_mem_set_alloc(void *(*alloc)(size_t))
853 phys_mem_alloc = alloc;
856 static uint16_t phys_section_add(PhysPageMap *map,
857 MemoryRegionSection *section)
859 /* The physical section number is ORed with a page-aligned
860 * pointer to produce the iotlb entries. Thus it should
861 * never overflow into the page-aligned value.
863 assert(map->sections_nb < TARGET_PAGE_SIZE);
865 if (map->sections_nb == map->sections_nb_alloc) {
866 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
867 map->sections = g_renew(MemoryRegionSection, map->sections,
868 map->sections_nb_alloc);
870 map->sections[map->sections_nb] = *section;
871 memory_region_ref(section->mr);
872 return map->sections_nb++;
875 static void phys_section_destroy(MemoryRegion *mr)
877 memory_region_unref(mr);
879 if (mr->subpage) {
880 subpage_t *subpage = container_of(mr, subpage_t, iomem);
881 memory_region_destroy(&subpage->iomem);
882 g_free(subpage);
886 static void phys_sections_free(PhysPageMap *map)
888 while (map->sections_nb > 0) {
889 MemoryRegionSection *section = &map->sections[--map->sections_nb];
890 phys_section_destroy(section->mr);
892 g_free(map->sections);
893 g_free(map->nodes);
896 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
898 subpage_t *subpage;
899 hwaddr base = section->offset_within_address_space
900 & TARGET_PAGE_MASK;
901 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
902 d->map.nodes, d->map.sections);
903 MemoryRegionSection subsection = {
904 .offset_within_address_space = base,
905 .size = int128_make64(TARGET_PAGE_SIZE),
907 hwaddr start, end;
909 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
911 if (!(existing->mr->subpage)) {
912 subpage = subpage_init(d->as, base);
913 subsection.address_space = d->as;
914 subsection.mr = &subpage->iomem;
915 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
916 phys_section_add(&d->map, &subsection));
917 } else {
918 subpage = container_of(existing->mr, subpage_t, iomem);
920 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
921 end = start + int128_get64(section->size) - 1;
922 subpage_register(subpage, start, end,
923 phys_section_add(&d->map, section));
927 static void register_multipage(AddressSpaceDispatch *d,
928 MemoryRegionSection *section)
930 hwaddr start_addr = section->offset_within_address_space;
931 uint16_t section_index = phys_section_add(&d->map, section);
932 uint64_t num_pages = int128_get64(int128_rshift(section->size,
933 TARGET_PAGE_BITS));
935 assert(num_pages);
936 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
939 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
941 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
942 AddressSpaceDispatch *d = as->next_dispatch;
943 MemoryRegionSection now = *section, remain = *section;
944 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
946 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
947 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
948 - now.offset_within_address_space;
950 now.size = int128_min(int128_make64(left), now.size);
951 register_subpage(d, &now);
952 } else {
953 now.size = int128_zero();
955 while (int128_ne(remain.size, now.size)) {
956 remain.size = int128_sub(remain.size, now.size);
957 remain.offset_within_address_space += int128_get64(now.size);
958 remain.offset_within_region += int128_get64(now.size);
959 now = remain;
960 if (int128_lt(remain.size, page_size)) {
961 register_subpage(d, &now);
962 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
963 now.size = page_size;
964 register_subpage(d, &now);
965 } else {
966 now.size = int128_and(now.size, int128_neg(page_size));
967 register_multipage(d, &now);
972 void qemu_flush_coalesced_mmio_buffer(void)
974 if (kvm_enabled())
975 kvm_flush_coalesced_mmio_buffer();
978 void qemu_mutex_lock_ramlist(void)
980 qemu_mutex_lock(&ram_list.mutex);
983 void qemu_mutex_unlock_ramlist(void)
985 qemu_mutex_unlock(&ram_list.mutex);
988 #ifdef __linux__
990 #include <sys/vfs.h>
992 #define HUGETLBFS_MAGIC 0x958458f6
994 static long gethugepagesize(const char *path)
996 struct statfs fs;
997 int ret;
999 do {
1000 ret = statfs(path, &fs);
1001 } while (ret != 0 && errno == EINTR);
1003 if (ret != 0) {
1004 perror(path);
1005 return 0;
1008 if (fs.f_type != HUGETLBFS_MAGIC)
1009 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1011 return fs.f_bsize;
1014 static sigjmp_buf sigjump;
1016 static void sigbus_handler(int signal)
1018 siglongjmp(sigjump, 1);
1021 static void *file_ram_alloc(RAMBlock *block,
1022 ram_addr_t memory,
1023 const char *path)
1025 char *filename;
1026 char *sanitized_name;
1027 char *c;
1028 void *area;
1029 int fd;
1030 unsigned long hpagesize;
1032 hpagesize = gethugepagesize(path);
1033 if (!hpagesize) {
1034 goto error;
1037 if (memory < hpagesize) {
1038 return NULL;
1041 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1042 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
1043 goto error;
1046 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1047 sanitized_name = g_strdup(block->mr->name);
1048 for (c = sanitized_name; *c != '\0'; c++) {
1049 if (*c == '/')
1050 *c = '_';
1053 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1054 sanitized_name);
1055 g_free(sanitized_name);
1057 fd = mkstemp(filename);
1058 if (fd < 0) {
1059 perror("unable to create backing store for hugepages");
1060 g_free(filename);
1061 goto error;
1063 unlink(filename);
1064 g_free(filename);
1066 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1069 * ftruncate is not supported by hugetlbfs in older
1070 * hosts, so don't bother bailing out on errors.
1071 * If anything goes wrong with it under other filesystems,
1072 * mmap will fail.
1074 if (ftruncate(fd, memory))
1075 perror("ftruncate");
1077 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1078 if (area == MAP_FAILED) {
1079 perror("file_ram_alloc: can't mmap RAM pages");
1080 close(fd);
1081 goto error;
1084 if (mem_prealloc) {
1085 int ret, i;
1086 struct sigaction act, oldact;
1087 sigset_t set, oldset;
1089 memset(&act, 0, sizeof(act));
1090 act.sa_handler = &sigbus_handler;
1091 act.sa_flags = 0;
1093 ret = sigaction(SIGBUS, &act, &oldact);
1094 if (ret) {
1095 perror("file_ram_alloc: failed to install signal handler");
1096 exit(1);
1099 /* unblock SIGBUS */
1100 sigemptyset(&set);
1101 sigaddset(&set, SIGBUS);
1102 pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
1104 if (sigsetjmp(sigjump, 1)) {
1105 fprintf(stderr, "file_ram_alloc: failed to preallocate pages\n");
1106 exit(1);
1109 /* MAP_POPULATE silently ignores failures */
1110 for (i = 0; i < (memory/hpagesize); i++) {
1111 memset(area + (hpagesize*i), 0, 1);
1114 ret = sigaction(SIGBUS, &oldact, NULL);
1115 if (ret) {
1116 perror("file_ram_alloc: failed to reinstall signal handler");
1117 exit(1);
1120 pthread_sigmask(SIG_SETMASK, &oldset, NULL);
1123 block->fd = fd;
1124 return area;
1126 error:
1127 if (mem_prealloc) {
1128 exit(1);
1130 return NULL;
1132 #else
1133 static void *file_ram_alloc(RAMBlock *block,
1134 ram_addr_t memory,
1135 const char *path)
1137 fprintf(stderr, "-mem-path not supported on this host\n");
1138 exit(1);
1140 #endif
1142 static ram_addr_t find_ram_offset(ram_addr_t size)
1144 RAMBlock *block, *next_block;
1145 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1147 assert(size != 0); /* it would hand out same offset multiple times */
1149 if (QTAILQ_EMPTY(&ram_list.blocks))
1150 return 0;
1152 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1153 ram_addr_t end, next = RAM_ADDR_MAX;
1155 end = block->offset + block->length;
1157 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1158 if (next_block->offset >= end) {
1159 next = MIN(next, next_block->offset);
1162 if (next - end >= size && next - end < mingap) {
1163 offset = end;
1164 mingap = next - end;
1168 if (offset == RAM_ADDR_MAX) {
1169 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1170 (uint64_t)size);
1171 abort();
1174 return offset;
1177 ram_addr_t last_ram_offset(void)
1179 RAMBlock *block;
1180 ram_addr_t last = 0;
1182 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1183 last = MAX(last, block->offset + block->length);
1185 return last;
1188 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1190 int ret;
1192 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1193 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1194 "dump-guest-core", true)) {
1195 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1196 if (ret) {
1197 perror("qemu_madvise");
1198 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1199 "but dump_guest_core=off specified\n");
1204 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1206 RAMBlock *new_block, *block;
1208 new_block = NULL;
1209 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1210 if (block->offset == addr) {
1211 new_block = block;
1212 break;
1215 assert(new_block);
1216 assert(!new_block->idstr[0]);
1218 if (dev) {
1219 char *id = qdev_get_dev_path(dev);
1220 if (id) {
1221 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1222 g_free(id);
1225 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1227 /* This assumes the iothread lock is taken here too. */
1228 qemu_mutex_lock_ramlist();
1229 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1230 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1231 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1232 new_block->idstr);
1233 abort();
1236 qemu_mutex_unlock_ramlist();
1239 static int memory_try_enable_merging(void *addr, size_t len)
1241 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1242 /* disabled by the user */
1243 return 0;
1246 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1249 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1250 MemoryRegion *mr)
1252 RAMBlock *block, *new_block;
1253 ram_addr_t old_ram_size, new_ram_size;
1255 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1257 size = TARGET_PAGE_ALIGN(size);
1258 new_block = g_malloc0(sizeof(*new_block));
1259 new_block->fd = -1;
1261 /* This assumes the iothread lock is taken here too. */
1262 qemu_mutex_lock_ramlist();
1263 new_block->mr = mr;
1264 new_block->offset = find_ram_offset(size);
1265 if (host) {
1266 new_block->host = host;
1267 new_block->flags |= RAM_PREALLOC_MASK;
1268 } else if (xen_enabled()) {
1269 if (mem_path) {
1270 fprintf(stderr, "-mem-path not supported with Xen\n");
1271 exit(1);
1273 xen_ram_alloc(new_block->offset, size, mr);
1274 } else {
1275 if (mem_path) {
1276 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1278 * file_ram_alloc() needs to allocate just like
1279 * phys_mem_alloc, but we haven't bothered to provide
1280 * a hook there.
1282 fprintf(stderr,
1283 "-mem-path not supported with this accelerator\n");
1284 exit(1);
1286 new_block->host = file_ram_alloc(new_block, size, mem_path);
1288 if (!new_block->host) {
1289 new_block->host = phys_mem_alloc(size);
1290 if (!new_block->host) {
1291 fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
1292 new_block->mr->name, strerror(errno));
1293 exit(1);
1295 memory_try_enable_merging(new_block->host, size);
1298 new_block->length = size;
1300 /* Keep the list sorted from biggest to smallest block. */
1301 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1302 if (block->length < new_block->length) {
1303 break;
1306 if (block) {
1307 QTAILQ_INSERT_BEFORE(block, new_block, next);
1308 } else {
1309 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1311 ram_list.mru_block = NULL;
1313 ram_list.version++;
1314 qemu_mutex_unlock_ramlist();
1316 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1318 if (new_ram_size > old_ram_size) {
1319 int i;
1320 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1321 ram_list.dirty_memory[i] =
1322 bitmap_zero_extend(ram_list.dirty_memory[i],
1323 old_ram_size, new_ram_size);
1326 cpu_physical_memory_set_dirty_range(new_block->offset, size);
1328 qemu_ram_setup_dump(new_block->host, size);
1329 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1330 qemu_madvise(new_block->host, size, QEMU_MADV_DONTFORK);
1332 if (kvm_enabled())
1333 kvm_setup_guest_memory(new_block->host, size);
1335 return new_block->offset;
1338 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1340 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1343 void qemu_ram_free_from_ptr(ram_addr_t addr)
1345 RAMBlock *block;
1347 /* This assumes the iothread lock is taken here too. */
1348 qemu_mutex_lock_ramlist();
1349 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1350 if (addr == block->offset) {
1351 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1352 ram_list.mru_block = NULL;
1353 ram_list.version++;
1354 g_free(block);
1355 break;
1358 qemu_mutex_unlock_ramlist();
1361 void qemu_ram_free(ram_addr_t addr)
1363 RAMBlock *block;
1365 /* This assumes the iothread lock is taken here too. */
1366 qemu_mutex_lock_ramlist();
1367 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1368 if (addr == block->offset) {
1369 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1370 ram_list.mru_block = NULL;
1371 ram_list.version++;
1372 if (block->flags & RAM_PREALLOC_MASK) {
1374 } else if (xen_enabled()) {
1375 xen_invalidate_map_cache_entry(block->host);
1376 #ifndef _WIN32
1377 } else if (block->fd >= 0) {
1378 munmap(block->host, block->length);
1379 close(block->fd);
1380 #endif
1381 } else {
1382 qemu_anon_ram_free(block->host, block->length);
1384 g_free(block);
1385 break;
1388 qemu_mutex_unlock_ramlist();
1392 #ifndef _WIN32
1393 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1395 RAMBlock *block;
1396 ram_addr_t offset;
1397 int flags;
1398 void *area, *vaddr;
1400 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1401 offset = addr - block->offset;
1402 if (offset < block->length) {
1403 vaddr = block->host + offset;
1404 if (block->flags & RAM_PREALLOC_MASK) {
1406 } else if (xen_enabled()) {
1407 abort();
1408 } else {
1409 flags = MAP_FIXED;
1410 munmap(vaddr, length);
1411 if (block->fd >= 0) {
1412 #ifdef MAP_POPULATE
1413 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1414 MAP_PRIVATE;
1415 #else
1416 flags |= MAP_PRIVATE;
1417 #endif
1418 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1419 flags, block->fd, offset);
1420 } else {
1422 * Remap needs to match alloc. Accelerators that
1423 * set phys_mem_alloc never remap. If they did,
1424 * we'd need a remap hook here.
1426 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1428 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1429 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1430 flags, -1, 0);
1432 if (area != vaddr) {
1433 fprintf(stderr, "Could not remap addr: "
1434 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1435 length, addr);
1436 exit(1);
1438 memory_try_enable_merging(vaddr, length);
1439 qemu_ram_setup_dump(vaddr, length);
1441 return;
1445 #endif /* !_WIN32 */
1447 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1448 With the exception of the softmmu code in this file, this should
1449 only be used for local memory (e.g. video ram) that the device owns,
1450 and knows it isn't going to access beyond the end of the block.
1452 It should not be used for general purpose DMA.
1453 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1455 void *qemu_get_ram_ptr(ram_addr_t addr)
1457 RAMBlock *block = qemu_get_ram_block(addr);
1459 if (xen_enabled()) {
1460 /* We need to check if the requested address is in the RAM
1461 * because we don't want to map the entire memory in QEMU.
1462 * In that case just map until the end of the page.
1464 if (block->offset == 0) {
1465 return xen_map_cache(addr, 0, 0);
1466 } else if (block->host == NULL) {
1467 block->host =
1468 xen_map_cache(block->offset, block->length, 1);
1471 return block->host + (addr - block->offset);
1474 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1475 * but takes a size argument */
1476 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1478 if (*size == 0) {
1479 return NULL;
1481 if (xen_enabled()) {
1482 return xen_map_cache(addr, *size, 1);
1483 } else {
1484 RAMBlock *block;
1486 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1487 if (addr - block->offset < block->length) {
1488 if (addr - block->offset + *size > block->length)
1489 *size = block->length - addr + block->offset;
1490 return block->host + (addr - block->offset);
1494 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1495 abort();
1499 /* Some of the softmmu routines need to translate from a host pointer
1500 (typically a TLB entry) back to a ram offset. */
1501 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1503 RAMBlock *block;
1504 uint8_t *host = ptr;
1506 if (xen_enabled()) {
1507 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1508 return qemu_get_ram_block(*ram_addr)->mr;
1511 block = ram_list.mru_block;
1512 if (block && block->host && host - block->host < block->length) {
1513 goto found;
1516 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1517 /* This case append when the block is not mapped. */
1518 if (block->host == NULL) {
1519 continue;
1521 if (host - block->host < block->length) {
1522 goto found;
1526 return NULL;
1528 found:
1529 *ram_addr = block->offset + (host - block->host);
1530 return block->mr;
1533 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1534 uint64_t val, unsigned size)
1536 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1537 tb_invalidate_phys_page_fast(ram_addr, size);
1539 switch (size) {
1540 case 1:
1541 stb_p(qemu_get_ram_ptr(ram_addr), val);
1542 break;
1543 case 2:
1544 stw_p(qemu_get_ram_ptr(ram_addr), val);
1545 break;
1546 case 4:
1547 stl_p(qemu_get_ram_ptr(ram_addr), val);
1548 break;
1549 default:
1550 abort();
1552 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_MIGRATION);
1553 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_VGA);
1554 /* we remove the notdirty callback only if the code has been
1555 flushed */
1556 if (!cpu_physical_memory_is_clean(ram_addr)) {
1557 CPUArchState *env = current_cpu->env_ptr;
1558 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1562 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1563 unsigned size, bool is_write)
1565 return is_write;
1568 static const MemoryRegionOps notdirty_mem_ops = {
1569 .write = notdirty_mem_write,
1570 .valid.accepts = notdirty_mem_accepts,
1571 .endianness = DEVICE_NATIVE_ENDIAN,
1574 /* Generate a debug exception if a watchpoint has been hit. */
1575 static void check_watchpoint(int offset, int len_mask, int flags)
1577 CPUState *cpu = current_cpu;
1578 CPUArchState *env = cpu->env_ptr;
1579 target_ulong pc, cs_base;
1580 target_ulong vaddr;
1581 CPUWatchpoint *wp;
1582 int cpu_flags;
1584 if (cpu->watchpoint_hit) {
1585 /* We re-entered the check after replacing the TB. Now raise
1586 * the debug interrupt so that is will trigger after the
1587 * current instruction. */
1588 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1589 return;
1591 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1592 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1593 if ((vaddr == (wp->vaddr & len_mask) ||
1594 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1595 wp->flags |= BP_WATCHPOINT_HIT;
1596 if (!cpu->watchpoint_hit) {
1597 cpu->watchpoint_hit = wp;
1598 tb_check_watchpoint(cpu);
1599 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1600 cpu->exception_index = EXCP_DEBUG;
1601 cpu_loop_exit(cpu);
1602 } else {
1603 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1604 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1605 cpu_resume_from_signal(cpu, NULL);
1608 } else {
1609 wp->flags &= ~BP_WATCHPOINT_HIT;
1614 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1615 so these check for a hit then pass through to the normal out-of-line
1616 phys routines. */
1617 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1618 unsigned size)
1620 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1621 switch (size) {
1622 case 1: return ldub_phys(&address_space_memory, addr);
1623 case 2: return lduw_phys(&address_space_memory, addr);
1624 case 4: return ldl_phys(&address_space_memory, addr);
1625 default: abort();
1629 static void watch_mem_write(void *opaque, hwaddr addr,
1630 uint64_t val, unsigned size)
1632 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1633 switch (size) {
1634 case 1:
1635 stb_phys(&address_space_memory, addr, val);
1636 break;
1637 case 2:
1638 stw_phys(&address_space_memory, addr, val);
1639 break;
1640 case 4:
1641 stl_phys(&address_space_memory, addr, val);
1642 break;
1643 default: abort();
1647 static const MemoryRegionOps watch_mem_ops = {
1648 .read = watch_mem_read,
1649 .write = watch_mem_write,
1650 .endianness = DEVICE_NATIVE_ENDIAN,
1653 static uint64_t subpage_read(void *opaque, hwaddr addr,
1654 unsigned len)
1656 subpage_t *subpage = opaque;
1657 uint8_t buf[4];
1659 #if defined(DEBUG_SUBPAGE)
1660 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1661 subpage, len, addr);
1662 #endif
1663 address_space_read(subpage->as, addr + subpage->base, buf, len);
1664 switch (len) {
1665 case 1:
1666 return ldub_p(buf);
1667 case 2:
1668 return lduw_p(buf);
1669 case 4:
1670 return ldl_p(buf);
1671 default:
1672 abort();
1676 static void subpage_write(void *opaque, hwaddr addr,
1677 uint64_t value, unsigned len)
1679 subpage_t *subpage = opaque;
1680 uint8_t buf[4];
1682 #if defined(DEBUG_SUBPAGE)
1683 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1684 " value %"PRIx64"\n",
1685 __func__, subpage, len, addr, value);
1686 #endif
1687 switch (len) {
1688 case 1:
1689 stb_p(buf, value);
1690 break;
1691 case 2:
1692 stw_p(buf, value);
1693 break;
1694 case 4:
1695 stl_p(buf, value);
1696 break;
1697 default:
1698 abort();
1700 address_space_write(subpage->as, addr + subpage->base, buf, len);
1703 static bool subpage_accepts(void *opaque, hwaddr addr,
1704 unsigned len, bool is_write)
1706 subpage_t *subpage = opaque;
1707 #if defined(DEBUG_SUBPAGE)
1708 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1709 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1710 #endif
1712 return address_space_access_valid(subpage->as, addr + subpage->base,
1713 len, is_write);
1716 static const MemoryRegionOps subpage_ops = {
1717 .read = subpage_read,
1718 .write = subpage_write,
1719 .valid.accepts = subpage_accepts,
1720 .endianness = DEVICE_NATIVE_ENDIAN,
1723 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1724 uint16_t section)
1726 int idx, eidx;
1728 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1729 return -1;
1730 idx = SUBPAGE_IDX(start);
1731 eidx = SUBPAGE_IDX(end);
1732 #if defined(DEBUG_SUBPAGE)
1733 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1734 __func__, mmio, start, end, idx, eidx, section);
1735 #endif
1736 for (; idx <= eidx; idx++) {
1737 mmio->sub_section[idx] = section;
1740 return 0;
1743 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1745 subpage_t *mmio;
1747 mmio = g_malloc0(sizeof(subpage_t));
1749 mmio->as = as;
1750 mmio->base = base;
1751 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1752 "subpage", TARGET_PAGE_SIZE);
1753 mmio->iomem.subpage = true;
1754 #if defined(DEBUG_SUBPAGE)
1755 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1756 mmio, base, TARGET_PAGE_SIZE);
1757 #endif
1758 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1760 return mmio;
1763 static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr)
1765 MemoryRegionSection section = {
1766 .address_space = &address_space_memory,
1767 .mr = mr,
1768 .offset_within_address_space = 0,
1769 .offset_within_region = 0,
1770 .size = int128_2_64(),
1773 return phys_section_add(map, &section);
1776 MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1778 return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1781 static void io_mem_init(void)
1783 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1784 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1785 "unassigned", UINT64_MAX);
1786 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1787 "notdirty", UINT64_MAX);
1788 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1789 "watch", UINT64_MAX);
1792 static void mem_begin(MemoryListener *listener)
1794 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1795 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1796 uint16_t n;
1798 n = dummy_section(&d->map, &io_mem_unassigned);
1799 assert(n == PHYS_SECTION_UNASSIGNED);
1800 n = dummy_section(&d->map, &io_mem_notdirty);
1801 assert(n == PHYS_SECTION_NOTDIRTY);
1802 n = dummy_section(&d->map, &io_mem_rom);
1803 assert(n == PHYS_SECTION_ROM);
1804 n = dummy_section(&d->map, &io_mem_watch);
1805 assert(n == PHYS_SECTION_WATCH);
1807 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1808 d->as = as;
1809 as->next_dispatch = d;
1812 static void mem_commit(MemoryListener *listener)
1814 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1815 AddressSpaceDispatch *cur = as->dispatch;
1816 AddressSpaceDispatch *next = as->next_dispatch;
1818 phys_page_compact_all(next, next->map.nodes_nb);
1820 as->dispatch = next;
1822 if (cur) {
1823 phys_sections_free(&cur->map);
1824 g_free(cur);
1828 static void tcg_commit(MemoryListener *listener)
1830 CPUState *cpu;
1832 /* since each CPU stores ram addresses in its TLB cache, we must
1833 reset the modified entries */
1834 /* XXX: slow ! */
1835 CPU_FOREACH(cpu) {
1836 /* FIXME: Disentangle the cpu.h circular files deps so we can
1837 directly get the right CPU from listener. */
1838 if (cpu->tcg_as_listener != listener) {
1839 continue;
1841 tlb_flush(cpu, 1);
1845 static void core_log_global_start(MemoryListener *listener)
1847 cpu_physical_memory_set_dirty_tracking(true);
1850 static void core_log_global_stop(MemoryListener *listener)
1852 cpu_physical_memory_set_dirty_tracking(false);
1855 static MemoryListener core_memory_listener = {
1856 .log_global_start = core_log_global_start,
1857 .log_global_stop = core_log_global_stop,
1858 .priority = 1,
1861 void address_space_init_dispatch(AddressSpace *as)
1863 as->dispatch = NULL;
1864 as->dispatch_listener = (MemoryListener) {
1865 .begin = mem_begin,
1866 .commit = mem_commit,
1867 .region_add = mem_add,
1868 .region_nop = mem_add,
1869 .priority = 0,
1871 memory_listener_register(&as->dispatch_listener, as);
1874 void address_space_destroy_dispatch(AddressSpace *as)
1876 AddressSpaceDispatch *d = as->dispatch;
1878 memory_listener_unregister(&as->dispatch_listener);
1879 g_free(d);
1880 as->dispatch = NULL;
1883 static void memory_map_init(void)
1885 system_memory = g_malloc(sizeof(*system_memory));
1887 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
1888 address_space_init(&address_space_memory, system_memory, "memory");
1890 system_io = g_malloc(sizeof(*system_io));
1891 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
1892 65536);
1893 address_space_init(&address_space_io, system_io, "I/O");
1895 memory_listener_register(&core_memory_listener, &address_space_memory);
1898 MemoryRegion *get_system_memory(void)
1900 return system_memory;
1903 MemoryRegion *get_system_io(void)
1905 return system_io;
1908 #endif /* !defined(CONFIG_USER_ONLY) */
1910 /* physical memory access (slow version, mainly for debug) */
1911 #if defined(CONFIG_USER_ONLY)
1912 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
1913 uint8_t *buf, int len, int is_write)
1915 int l, flags;
1916 target_ulong page;
1917 void * p;
1919 while (len > 0) {
1920 page = addr & TARGET_PAGE_MASK;
1921 l = (page + TARGET_PAGE_SIZE) - addr;
1922 if (l > len)
1923 l = len;
1924 flags = page_get_flags(page);
1925 if (!(flags & PAGE_VALID))
1926 return -1;
1927 if (is_write) {
1928 if (!(flags & PAGE_WRITE))
1929 return -1;
1930 /* XXX: this code should not depend on lock_user */
1931 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1932 return -1;
1933 memcpy(p, buf, l);
1934 unlock_user(p, addr, l);
1935 } else {
1936 if (!(flags & PAGE_READ))
1937 return -1;
1938 /* XXX: this code should not depend on lock_user */
1939 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1940 return -1;
1941 memcpy(buf, p, l);
1942 unlock_user(p, addr, 0);
1944 len -= l;
1945 buf += l;
1946 addr += l;
1948 return 0;
1951 #else
1953 static void invalidate_and_set_dirty(hwaddr addr,
1954 hwaddr length)
1956 if (cpu_physical_memory_is_clean(addr)) {
1957 /* invalidate code */
1958 tb_invalidate_phys_page_range(addr, addr + length, 0);
1959 /* set dirty bit */
1960 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_VGA);
1961 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
1963 xen_modified_memory(addr, length);
1966 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1968 unsigned access_size_max = mr->ops->valid.max_access_size;
1970 /* Regions are assumed to support 1-4 byte accesses unless
1971 otherwise specified. */
1972 if (access_size_max == 0) {
1973 access_size_max = 4;
1976 /* Bound the maximum access by the alignment of the address. */
1977 if (!mr->ops->impl.unaligned) {
1978 unsigned align_size_max = addr & -addr;
1979 if (align_size_max != 0 && align_size_max < access_size_max) {
1980 access_size_max = align_size_max;
1984 /* Don't attempt accesses larger than the maximum. */
1985 if (l > access_size_max) {
1986 l = access_size_max;
1988 if (l & (l - 1)) {
1989 l = 1 << (qemu_fls(l) - 1);
1992 return l;
1995 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1996 int len, bool is_write)
1998 hwaddr l;
1999 uint8_t *ptr;
2000 uint64_t val;
2001 hwaddr addr1;
2002 MemoryRegion *mr;
2003 bool error = false;
2005 while (len > 0) {
2006 l = len;
2007 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2009 if (is_write) {
2010 if (!memory_access_is_direct(mr, is_write)) {
2011 l = memory_access_size(mr, l, addr1);
2012 /* XXX: could force current_cpu to NULL to avoid
2013 potential bugs */
2014 switch (l) {
2015 case 8:
2016 /* 64 bit write access */
2017 val = ldq_p(buf);
2018 error |= io_mem_write(mr, addr1, val, 8);
2019 break;
2020 case 4:
2021 /* 32 bit write access */
2022 val = ldl_p(buf);
2023 error |= io_mem_write(mr, addr1, val, 4);
2024 break;
2025 case 2:
2026 /* 16 bit write access */
2027 val = lduw_p(buf);
2028 error |= io_mem_write(mr, addr1, val, 2);
2029 break;
2030 case 1:
2031 /* 8 bit write access */
2032 val = ldub_p(buf);
2033 error |= io_mem_write(mr, addr1, val, 1);
2034 break;
2035 default:
2036 abort();
2038 } else {
2039 addr1 += memory_region_get_ram_addr(mr);
2040 /* RAM case */
2041 ptr = qemu_get_ram_ptr(addr1);
2042 memcpy(ptr, buf, l);
2043 invalidate_and_set_dirty(addr1, l);
2045 } else {
2046 if (!memory_access_is_direct(mr, is_write)) {
2047 /* I/O case */
2048 l = memory_access_size(mr, l, addr1);
2049 switch (l) {
2050 case 8:
2051 /* 64 bit read access */
2052 error |= io_mem_read(mr, addr1, &val, 8);
2053 stq_p(buf, val);
2054 break;
2055 case 4:
2056 /* 32 bit read access */
2057 error |= io_mem_read(mr, addr1, &val, 4);
2058 stl_p(buf, val);
2059 break;
2060 case 2:
2061 /* 16 bit read access */
2062 error |= io_mem_read(mr, addr1, &val, 2);
2063 stw_p(buf, val);
2064 break;
2065 case 1:
2066 /* 8 bit read access */
2067 error |= io_mem_read(mr, addr1, &val, 1);
2068 stb_p(buf, val);
2069 break;
2070 default:
2071 abort();
2073 } else {
2074 /* RAM case */
2075 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2076 memcpy(buf, ptr, l);
2079 len -= l;
2080 buf += l;
2081 addr += l;
2084 return error;
2087 bool address_space_write(AddressSpace *as, hwaddr addr,
2088 const uint8_t *buf, int len)
2090 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2093 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2095 return address_space_rw(as, addr, buf, len, false);
2099 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2100 int len, int is_write)
2102 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2105 enum write_rom_type {
2106 WRITE_DATA,
2107 FLUSH_CACHE,
2110 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2111 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2113 hwaddr l;
2114 uint8_t *ptr;
2115 hwaddr addr1;
2116 MemoryRegion *mr;
2118 while (len > 0) {
2119 l = len;
2120 mr = address_space_translate(as, addr, &addr1, &l, true);
2122 if (!(memory_region_is_ram(mr) ||
2123 memory_region_is_romd(mr))) {
2124 /* do nothing */
2125 } else {
2126 addr1 += memory_region_get_ram_addr(mr);
2127 /* ROM/RAM case */
2128 ptr = qemu_get_ram_ptr(addr1);
2129 switch (type) {
2130 case WRITE_DATA:
2131 memcpy(ptr, buf, l);
2132 invalidate_and_set_dirty(addr1, l);
2133 break;
2134 case FLUSH_CACHE:
2135 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2136 break;
2139 len -= l;
2140 buf += l;
2141 addr += l;
2145 /* used for ROM loading : can write in RAM and ROM */
2146 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2147 const uint8_t *buf, int len)
2149 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2152 void cpu_flush_icache_range(hwaddr start, int len)
2155 * This function should do the same thing as an icache flush that was
2156 * triggered from within the guest. For TCG we are always cache coherent,
2157 * so there is no need to flush anything. For KVM / Xen we need to flush
2158 * the host's instruction cache at least.
2160 if (tcg_enabled()) {
2161 return;
2164 cpu_physical_memory_write_rom_internal(&address_space_memory,
2165 start, NULL, len, FLUSH_CACHE);
2168 typedef struct {
2169 MemoryRegion *mr;
2170 void *buffer;
2171 hwaddr addr;
2172 hwaddr len;
2173 } BounceBuffer;
2175 static BounceBuffer bounce;
2177 typedef struct MapClient {
2178 void *opaque;
2179 void (*callback)(void *opaque);
2180 QLIST_ENTRY(MapClient) link;
2181 } MapClient;
2183 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2184 = QLIST_HEAD_INITIALIZER(map_client_list);
2186 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2188 MapClient *client = g_malloc(sizeof(*client));
2190 client->opaque = opaque;
2191 client->callback = callback;
2192 QLIST_INSERT_HEAD(&map_client_list, client, link);
2193 return client;
2196 static void cpu_unregister_map_client(void *_client)
2198 MapClient *client = (MapClient *)_client;
2200 QLIST_REMOVE(client, link);
2201 g_free(client);
2204 static void cpu_notify_map_clients(void)
2206 MapClient *client;
2208 while (!QLIST_EMPTY(&map_client_list)) {
2209 client = QLIST_FIRST(&map_client_list);
2210 client->callback(client->opaque);
2211 cpu_unregister_map_client(client);
2215 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2217 MemoryRegion *mr;
2218 hwaddr l, xlat;
2220 while (len > 0) {
2221 l = len;
2222 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2223 if (!memory_access_is_direct(mr, is_write)) {
2224 l = memory_access_size(mr, l, addr);
2225 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2226 return false;
2230 len -= l;
2231 addr += l;
2233 return true;
2236 /* Map a physical memory region into a host virtual address.
2237 * May map a subset of the requested range, given by and returned in *plen.
2238 * May return NULL if resources needed to perform the mapping are exhausted.
2239 * Use only for reads OR writes - not for read-modify-write operations.
2240 * Use cpu_register_map_client() to know when retrying the map operation is
2241 * likely to succeed.
2243 void *address_space_map(AddressSpace *as,
2244 hwaddr addr,
2245 hwaddr *plen,
2246 bool is_write)
2248 hwaddr len = *plen;
2249 hwaddr done = 0;
2250 hwaddr l, xlat, base;
2251 MemoryRegion *mr, *this_mr;
2252 ram_addr_t raddr;
2254 if (len == 0) {
2255 return NULL;
2258 l = len;
2259 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2260 if (!memory_access_is_direct(mr, is_write)) {
2261 if (bounce.buffer) {
2262 return NULL;
2264 /* Avoid unbounded allocations */
2265 l = MIN(l, TARGET_PAGE_SIZE);
2266 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2267 bounce.addr = addr;
2268 bounce.len = l;
2270 memory_region_ref(mr);
2271 bounce.mr = mr;
2272 if (!is_write) {
2273 address_space_read(as, addr, bounce.buffer, l);
2276 *plen = l;
2277 return bounce.buffer;
2280 base = xlat;
2281 raddr = memory_region_get_ram_addr(mr);
2283 for (;;) {
2284 len -= l;
2285 addr += l;
2286 done += l;
2287 if (len == 0) {
2288 break;
2291 l = len;
2292 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2293 if (this_mr != mr || xlat != base + done) {
2294 break;
2298 memory_region_ref(mr);
2299 *plen = done;
2300 return qemu_ram_ptr_length(raddr + base, plen);
2303 /* Unmaps a memory region previously mapped by address_space_map().
2304 * Will also mark the memory as dirty if is_write == 1. access_len gives
2305 * the amount of memory that was actually read or written by the caller.
2307 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2308 int is_write, hwaddr access_len)
2310 if (buffer != bounce.buffer) {
2311 MemoryRegion *mr;
2312 ram_addr_t addr1;
2314 mr = qemu_ram_addr_from_host(buffer, &addr1);
2315 assert(mr != NULL);
2316 if (is_write) {
2317 while (access_len) {
2318 unsigned l;
2319 l = TARGET_PAGE_SIZE;
2320 if (l > access_len)
2321 l = access_len;
2322 invalidate_and_set_dirty(addr1, l);
2323 addr1 += l;
2324 access_len -= l;
2327 if (xen_enabled()) {
2328 xen_invalidate_map_cache_entry(buffer);
2330 memory_region_unref(mr);
2331 return;
2333 if (is_write) {
2334 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2336 qemu_vfree(bounce.buffer);
2337 bounce.buffer = NULL;
2338 memory_region_unref(bounce.mr);
2339 cpu_notify_map_clients();
2342 void *cpu_physical_memory_map(hwaddr addr,
2343 hwaddr *plen,
2344 int is_write)
2346 return address_space_map(&address_space_memory, addr, plen, is_write);
2349 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2350 int is_write, hwaddr access_len)
2352 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2355 /* warning: addr must be aligned */
2356 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2357 enum device_endian endian)
2359 uint8_t *ptr;
2360 uint64_t val;
2361 MemoryRegion *mr;
2362 hwaddr l = 4;
2363 hwaddr addr1;
2365 mr = address_space_translate(as, addr, &addr1, &l, false);
2366 if (l < 4 || !memory_access_is_direct(mr, false)) {
2367 /* I/O case */
2368 io_mem_read(mr, addr1, &val, 4);
2369 #if defined(TARGET_WORDS_BIGENDIAN)
2370 if (endian == DEVICE_LITTLE_ENDIAN) {
2371 val = bswap32(val);
2373 #else
2374 if (endian == DEVICE_BIG_ENDIAN) {
2375 val = bswap32(val);
2377 #endif
2378 } else {
2379 /* RAM case */
2380 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2381 & TARGET_PAGE_MASK)
2382 + addr1);
2383 switch (endian) {
2384 case DEVICE_LITTLE_ENDIAN:
2385 val = ldl_le_p(ptr);
2386 break;
2387 case DEVICE_BIG_ENDIAN:
2388 val = ldl_be_p(ptr);
2389 break;
2390 default:
2391 val = ldl_p(ptr);
2392 break;
2395 return val;
2398 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2400 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2403 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2405 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2408 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2410 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2413 /* warning: addr must be aligned */
2414 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2415 enum device_endian endian)
2417 uint8_t *ptr;
2418 uint64_t val;
2419 MemoryRegion *mr;
2420 hwaddr l = 8;
2421 hwaddr addr1;
2423 mr = address_space_translate(as, addr, &addr1, &l,
2424 false);
2425 if (l < 8 || !memory_access_is_direct(mr, false)) {
2426 /* I/O case */
2427 io_mem_read(mr, addr1, &val, 8);
2428 #if defined(TARGET_WORDS_BIGENDIAN)
2429 if (endian == DEVICE_LITTLE_ENDIAN) {
2430 val = bswap64(val);
2432 #else
2433 if (endian == DEVICE_BIG_ENDIAN) {
2434 val = bswap64(val);
2436 #endif
2437 } else {
2438 /* RAM case */
2439 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2440 & TARGET_PAGE_MASK)
2441 + addr1);
2442 switch (endian) {
2443 case DEVICE_LITTLE_ENDIAN:
2444 val = ldq_le_p(ptr);
2445 break;
2446 case DEVICE_BIG_ENDIAN:
2447 val = ldq_be_p(ptr);
2448 break;
2449 default:
2450 val = ldq_p(ptr);
2451 break;
2454 return val;
2457 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2459 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2462 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2464 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2467 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2469 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2472 /* XXX: optimize */
2473 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2475 uint8_t val;
2476 address_space_rw(as, addr, &val, 1, 0);
2477 return val;
2480 /* warning: addr must be aligned */
2481 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2482 enum device_endian endian)
2484 uint8_t *ptr;
2485 uint64_t val;
2486 MemoryRegion *mr;
2487 hwaddr l = 2;
2488 hwaddr addr1;
2490 mr = address_space_translate(as, addr, &addr1, &l,
2491 false);
2492 if (l < 2 || !memory_access_is_direct(mr, false)) {
2493 /* I/O case */
2494 io_mem_read(mr, addr1, &val, 2);
2495 #if defined(TARGET_WORDS_BIGENDIAN)
2496 if (endian == DEVICE_LITTLE_ENDIAN) {
2497 val = bswap16(val);
2499 #else
2500 if (endian == DEVICE_BIG_ENDIAN) {
2501 val = bswap16(val);
2503 #endif
2504 } else {
2505 /* RAM case */
2506 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2507 & TARGET_PAGE_MASK)
2508 + addr1);
2509 switch (endian) {
2510 case DEVICE_LITTLE_ENDIAN:
2511 val = lduw_le_p(ptr);
2512 break;
2513 case DEVICE_BIG_ENDIAN:
2514 val = lduw_be_p(ptr);
2515 break;
2516 default:
2517 val = lduw_p(ptr);
2518 break;
2521 return val;
2524 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2526 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2529 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2531 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2534 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2536 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2539 /* warning: addr must be aligned. The ram page is not masked as dirty
2540 and the code inside is not invalidated. It is useful if the dirty
2541 bits are used to track modified PTEs */
2542 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2544 uint8_t *ptr;
2545 MemoryRegion *mr;
2546 hwaddr l = 4;
2547 hwaddr addr1;
2549 mr = address_space_translate(as, addr, &addr1, &l,
2550 true);
2551 if (l < 4 || !memory_access_is_direct(mr, true)) {
2552 io_mem_write(mr, addr1, val, 4);
2553 } else {
2554 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2555 ptr = qemu_get_ram_ptr(addr1);
2556 stl_p(ptr, val);
2558 if (unlikely(in_migration)) {
2559 if (cpu_physical_memory_is_clean(addr1)) {
2560 /* invalidate code */
2561 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2562 /* set dirty bit */
2563 cpu_physical_memory_set_dirty_flag(addr1,
2564 DIRTY_MEMORY_MIGRATION);
2565 cpu_physical_memory_set_dirty_flag(addr1, DIRTY_MEMORY_VGA);
2571 /* warning: addr must be aligned */
2572 static inline void stl_phys_internal(AddressSpace *as,
2573 hwaddr addr, uint32_t val,
2574 enum device_endian endian)
2576 uint8_t *ptr;
2577 MemoryRegion *mr;
2578 hwaddr l = 4;
2579 hwaddr addr1;
2581 mr = address_space_translate(as, addr, &addr1, &l,
2582 true);
2583 if (l < 4 || !memory_access_is_direct(mr, true)) {
2584 #if defined(TARGET_WORDS_BIGENDIAN)
2585 if (endian == DEVICE_LITTLE_ENDIAN) {
2586 val = bswap32(val);
2588 #else
2589 if (endian == DEVICE_BIG_ENDIAN) {
2590 val = bswap32(val);
2592 #endif
2593 io_mem_write(mr, addr1, val, 4);
2594 } else {
2595 /* RAM case */
2596 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2597 ptr = qemu_get_ram_ptr(addr1);
2598 switch (endian) {
2599 case DEVICE_LITTLE_ENDIAN:
2600 stl_le_p(ptr, val);
2601 break;
2602 case DEVICE_BIG_ENDIAN:
2603 stl_be_p(ptr, val);
2604 break;
2605 default:
2606 stl_p(ptr, val);
2607 break;
2609 invalidate_and_set_dirty(addr1, 4);
2613 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2615 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2618 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2620 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2623 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2625 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2628 /* XXX: optimize */
2629 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2631 uint8_t v = val;
2632 address_space_rw(as, addr, &v, 1, 1);
2635 /* warning: addr must be aligned */
2636 static inline void stw_phys_internal(AddressSpace *as,
2637 hwaddr addr, uint32_t val,
2638 enum device_endian endian)
2640 uint8_t *ptr;
2641 MemoryRegion *mr;
2642 hwaddr l = 2;
2643 hwaddr addr1;
2645 mr = address_space_translate(as, addr, &addr1, &l, true);
2646 if (l < 2 || !memory_access_is_direct(mr, true)) {
2647 #if defined(TARGET_WORDS_BIGENDIAN)
2648 if (endian == DEVICE_LITTLE_ENDIAN) {
2649 val = bswap16(val);
2651 #else
2652 if (endian == DEVICE_BIG_ENDIAN) {
2653 val = bswap16(val);
2655 #endif
2656 io_mem_write(mr, addr1, val, 2);
2657 } else {
2658 /* RAM case */
2659 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2660 ptr = qemu_get_ram_ptr(addr1);
2661 switch (endian) {
2662 case DEVICE_LITTLE_ENDIAN:
2663 stw_le_p(ptr, val);
2664 break;
2665 case DEVICE_BIG_ENDIAN:
2666 stw_be_p(ptr, val);
2667 break;
2668 default:
2669 stw_p(ptr, val);
2670 break;
2672 invalidate_and_set_dirty(addr1, 2);
2676 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2678 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2681 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2683 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2686 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2688 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2691 /* XXX: optimize */
2692 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2694 val = tswap64(val);
2695 address_space_rw(as, addr, (void *) &val, 8, 1);
2698 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2700 val = cpu_to_le64(val);
2701 address_space_rw(as, addr, (void *) &val, 8, 1);
2704 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2706 val = cpu_to_be64(val);
2707 address_space_rw(as, addr, (void *) &val, 8, 1);
2710 /* virtual memory access for debug (includes writing to ROM) */
2711 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2712 uint8_t *buf, int len, int is_write)
2714 int l;
2715 hwaddr phys_addr;
2716 target_ulong page;
2718 while (len > 0) {
2719 page = addr & TARGET_PAGE_MASK;
2720 phys_addr = cpu_get_phys_page_debug(cpu, page);
2721 /* if no physical page mapped, return an error */
2722 if (phys_addr == -1)
2723 return -1;
2724 l = (page + TARGET_PAGE_SIZE) - addr;
2725 if (l > len)
2726 l = len;
2727 phys_addr += (addr & ~TARGET_PAGE_MASK);
2728 if (is_write) {
2729 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
2730 } else {
2731 address_space_rw(cpu->as, phys_addr, buf, l, 0);
2733 len -= l;
2734 buf += l;
2735 addr += l;
2737 return 0;
2739 #endif
2741 #if !defined(CONFIG_USER_ONLY)
2744 * A helper function for the _utterly broken_ virtio device model to find out if
2745 * it's running on a big endian machine. Don't do this at home kids!
2747 bool virtio_is_big_endian(void);
2748 bool virtio_is_big_endian(void)
2750 #if defined(TARGET_WORDS_BIGENDIAN)
2751 return true;
2752 #else
2753 return false;
2754 #endif
2757 #endif
2759 #ifndef CONFIG_USER_ONLY
2760 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2762 MemoryRegion*mr;
2763 hwaddr l = 1;
2765 mr = address_space_translate(&address_space_memory,
2766 phys_addr, &phys_addr, &l, false);
2768 return !(memory_region_is_ram(mr) ||
2769 memory_region_is_romd(mr));
2772 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2774 RAMBlock *block;
2776 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2777 func(block->host, block->offset, block->length, opaque);
2780 #endif