rules.mak: Fix DSO build by pulling in archive symbols
[qemu.git] / exec.c
blob7dddcc8034dd43a57af62b4b042c8cfb32001b05
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #include "hw/qdev.h"
30 #include "qemu/osdep.h"
31 #include "sysemu/kvm.h"
32 #include "sysemu/sysemu.h"
33 #include "hw/xen/xen.h"
34 #include "qemu/timer.h"
35 #include "qemu/config-file.h"
36 #include "qemu/error-report.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
51 #include "exec/memory-internal.h"
52 #include "exec/ram_addr.h"
54 #include "qemu/range.h"
56 //#define DEBUG_SUBPAGE
58 #if !defined(CONFIG_USER_ONLY)
59 static bool in_migration;
61 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
63 static MemoryRegion *system_memory;
64 static MemoryRegion *system_io;
66 AddressSpace address_space_io;
67 AddressSpace address_space_memory;
69 MemoryRegion io_mem_rom, io_mem_notdirty;
70 static MemoryRegion io_mem_unassigned;
72 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
73 #define RAM_PREALLOC (1 << 0)
75 /* RAM is mmap-ed with MAP_SHARED */
76 #define RAM_SHARED (1 << 1)
78 #endif
80 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
81 /* current CPU in the current thread. It is only valid inside
82 cpu_exec() */
83 DEFINE_TLS(CPUState *, current_cpu);
84 /* 0 = Do not count executed instructions.
85 1 = Precise instruction counting.
86 2 = Adaptive rate instruction counting. */
87 int use_icount;
89 #if !defined(CONFIG_USER_ONLY)
91 typedef struct PhysPageEntry PhysPageEntry;
93 struct PhysPageEntry {
94 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
95 uint32_t skip : 6;
96 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
97 uint32_t ptr : 26;
100 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
102 /* Size of the L2 (and L3, etc) page tables. */
103 #define ADDR_SPACE_BITS 64
105 #define P_L2_BITS 9
106 #define P_L2_SIZE (1 << P_L2_BITS)
108 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
110 typedef PhysPageEntry Node[P_L2_SIZE];
112 typedef struct PhysPageMap {
113 unsigned sections_nb;
114 unsigned sections_nb_alloc;
115 unsigned nodes_nb;
116 unsigned nodes_nb_alloc;
117 Node *nodes;
118 MemoryRegionSection *sections;
119 } PhysPageMap;
121 struct AddressSpaceDispatch {
122 /* This is a multi-level map on the physical address space.
123 * The bottom level has pointers to MemoryRegionSections.
125 PhysPageEntry phys_map;
126 PhysPageMap map;
127 AddressSpace *as;
130 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
131 typedef struct subpage_t {
132 MemoryRegion iomem;
133 AddressSpace *as;
134 hwaddr base;
135 uint16_t sub_section[TARGET_PAGE_SIZE];
136 } subpage_t;
138 #define PHYS_SECTION_UNASSIGNED 0
139 #define PHYS_SECTION_NOTDIRTY 1
140 #define PHYS_SECTION_ROM 2
141 #define PHYS_SECTION_WATCH 3
143 static void io_mem_init(void);
144 static void memory_map_init(void);
145 static void tcg_commit(MemoryListener *listener);
147 static MemoryRegion io_mem_watch;
148 #endif
150 #if !defined(CONFIG_USER_ONLY)
152 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
154 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
155 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
156 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
157 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
161 static uint32_t phys_map_node_alloc(PhysPageMap *map)
163 unsigned i;
164 uint32_t ret;
166 ret = map->nodes_nb++;
167 assert(ret != PHYS_MAP_NODE_NIL);
168 assert(ret != map->nodes_nb_alloc);
169 for (i = 0; i < P_L2_SIZE; ++i) {
170 map->nodes[ret][i].skip = 1;
171 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
173 return ret;
176 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
177 hwaddr *index, hwaddr *nb, uint16_t leaf,
178 int level)
180 PhysPageEntry *p;
181 int i;
182 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
184 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
185 lp->ptr = phys_map_node_alloc(map);
186 p = map->nodes[lp->ptr];
187 if (level == 0) {
188 for (i = 0; i < P_L2_SIZE; i++) {
189 p[i].skip = 0;
190 p[i].ptr = PHYS_SECTION_UNASSIGNED;
193 } else {
194 p = map->nodes[lp->ptr];
196 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
198 while (*nb && lp < &p[P_L2_SIZE]) {
199 if ((*index & (step - 1)) == 0 && *nb >= step) {
200 lp->skip = 0;
201 lp->ptr = leaf;
202 *index += step;
203 *nb -= step;
204 } else {
205 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
207 ++lp;
211 static void phys_page_set(AddressSpaceDispatch *d,
212 hwaddr index, hwaddr nb,
213 uint16_t leaf)
215 /* Wildly overreserve - it doesn't matter much. */
216 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
218 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
221 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
222 * and update our entry so we can skip it and go directly to the destination.
224 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
226 unsigned valid_ptr = P_L2_SIZE;
227 int valid = 0;
228 PhysPageEntry *p;
229 int i;
231 if (lp->ptr == PHYS_MAP_NODE_NIL) {
232 return;
235 p = nodes[lp->ptr];
236 for (i = 0; i < P_L2_SIZE; i++) {
237 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
238 continue;
241 valid_ptr = i;
242 valid++;
243 if (p[i].skip) {
244 phys_page_compact(&p[i], nodes, compacted);
248 /* We can only compress if there's only one child. */
249 if (valid != 1) {
250 return;
253 assert(valid_ptr < P_L2_SIZE);
255 /* Don't compress if it won't fit in the # of bits we have. */
256 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
257 return;
260 lp->ptr = p[valid_ptr].ptr;
261 if (!p[valid_ptr].skip) {
262 /* If our only child is a leaf, make this a leaf. */
263 /* By design, we should have made this node a leaf to begin with so we
264 * should never reach here.
265 * But since it's so simple to handle this, let's do it just in case we
266 * change this rule.
268 lp->skip = 0;
269 } else {
270 lp->skip += p[valid_ptr].skip;
274 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
276 DECLARE_BITMAP(compacted, nodes_nb);
278 if (d->phys_map.skip) {
279 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
283 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
284 Node *nodes, MemoryRegionSection *sections)
286 PhysPageEntry *p;
287 hwaddr index = addr >> TARGET_PAGE_BITS;
288 int i;
290 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
291 if (lp.ptr == PHYS_MAP_NODE_NIL) {
292 return &sections[PHYS_SECTION_UNASSIGNED];
294 p = nodes[lp.ptr];
295 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
298 if (sections[lp.ptr].size.hi ||
299 range_covers_byte(sections[lp.ptr].offset_within_address_space,
300 sections[lp.ptr].size.lo, addr)) {
301 return &sections[lp.ptr];
302 } else {
303 return &sections[PHYS_SECTION_UNASSIGNED];
307 bool memory_region_is_unassigned(MemoryRegion *mr)
309 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
310 && mr != &io_mem_watch;
313 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
314 hwaddr addr,
315 bool resolve_subpage)
317 MemoryRegionSection *section;
318 subpage_t *subpage;
320 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
321 if (resolve_subpage && section->mr->subpage) {
322 subpage = container_of(section->mr, subpage_t, iomem);
323 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
325 return section;
328 static MemoryRegionSection *
329 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
330 hwaddr *plen, bool resolve_subpage)
332 MemoryRegionSection *section;
333 Int128 diff;
335 section = address_space_lookup_region(d, addr, resolve_subpage);
336 /* Compute offset within MemoryRegionSection */
337 addr -= section->offset_within_address_space;
339 /* Compute offset within MemoryRegion */
340 *xlat = addr + section->offset_within_region;
342 diff = int128_sub(section->mr->size, int128_make64(addr));
343 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
344 return section;
347 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
349 if (memory_region_is_ram(mr)) {
350 return !(is_write && mr->readonly);
352 if (memory_region_is_romd(mr)) {
353 return !is_write;
356 return false;
359 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
360 hwaddr *xlat, hwaddr *plen,
361 bool is_write)
363 IOMMUTLBEntry iotlb;
364 MemoryRegionSection *section;
365 MemoryRegion *mr;
366 hwaddr len = *plen;
368 for (;;) {
369 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
370 mr = section->mr;
372 if (!mr->iommu_ops) {
373 break;
376 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
377 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
378 | (addr & iotlb.addr_mask));
379 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
380 if (!(iotlb.perm & (1 << is_write))) {
381 mr = &io_mem_unassigned;
382 break;
385 as = iotlb.target_as;
388 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
389 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
390 len = MIN(page, len);
393 *plen = len;
394 *xlat = addr;
395 return mr;
398 MemoryRegionSection *
399 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
400 hwaddr *plen)
402 MemoryRegionSection *section;
403 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
405 assert(!section->mr->iommu_ops);
406 return section;
408 #endif
410 void cpu_exec_init_all(void)
412 #if !defined(CONFIG_USER_ONLY)
413 qemu_mutex_init(&ram_list.mutex);
414 memory_map_init();
415 io_mem_init();
416 #endif
419 #if !defined(CONFIG_USER_ONLY)
421 static int cpu_common_post_load(void *opaque, int version_id)
423 CPUState *cpu = opaque;
425 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
426 version_id is increased. */
427 cpu->interrupt_request &= ~0x01;
428 tlb_flush(cpu, 1);
430 return 0;
433 static int cpu_common_pre_load(void *opaque)
435 CPUState *cpu = opaque;
437 cpu->exception_index = 0;
439 return 0;
442 static bool cpu_common_exception_index_needed(void *opaque)
444 CPUState *cpu = opaque;
446 return cpu->exception_index != 0;
449 static const VMStateDescription vmstate_cpu_common_exception_index = {
450 .name = "cpu_common/exception_index",
451 .version_id = 1,
452 .minimum_version_id = 1,
453 .fields = (VMStateField[]) {
454 VMSTATE_INT32(exception_index, CPUState),
455 VMSTATE_END_OF_LIST()
459 const VMStateDescription vmstate_cpu_common = {
460 .name = "cpu_common",
461 .version_id = 1,
462 .minimum_version_id = 1,
463 .pre_load = cpu_common_pre_load,
464 .post_load = cpu_common_post_load,
465 .fields = (VMStateField[]) {
466 VMSTATE_UINT32(halted, CPUState),
467 VMSTATE_UINT32(interrupt_request, CPUState),
468 VMSTATE_END_OF_LIST()
470 .subsections = (VMStateSubsection[]) {
472 .vmsd = &vmstate_cpu_common_exception_index,
473 .needed = cpu_common_exception_index_needed,
474 } , {
475 /* empty */
480 #endif
482 CPUState *qemu_get_cpu(int index)
484 CPUState *cpu;
486 CPU_FOREACH(cpu) {
487 if (cpu->cpu_index == index) {
488 return cpu;
492 return NULL;
495 #if !defined(CONFIG_USER_ONLY)
496 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
498 /* We only support one address space per cpu at the moment. */
499 assert(cpu->as == as);
501 if (cpu->tcg_as_listener) {
502 memory_listener_unregister(cpu->tcg_as_listener);
503 } else {
504 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
506 cpu->tcg_as_listener->commit = tcg_commit;
507 memory_listener_register(cpu->tcg_as_listener, as);
509 #endif
511 void cpu_exec_init(CPUArchState *env)
513 CPUState *cpu = ENV_GET_CPU(env);
514 CPUClass *cc = CPU_GET_CLASS(cpu);
515 CPUState *some_cpu;
516 int cpu_index;
518 #if defined(CONFIG_USER_ONLY)
519 cpu_list_lock();
520 #endif
521 cpu_index = 0;
522 CPU_FOREACH(some_cpu) {
523 cpu_index++;
525 cpu->cpu_index = cpu_index;
526 cpu->numa_node = 0;
527 QTAILQ_INIT(&cpu->breakpoints);
528 QTAILQ_INIT(&cpu->watchpoints);
529 #ifndef CONFIG_USER_ONLY
530 cpu->as = &address_space_memory;
531 cpu->thread_id = qemu_get_thread_id();
532 #endif
533 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
534 #if defined(CONFIG_USER_ONLY)
535 cpu_list_unlock();
536 #endif
537 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
538 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
540 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
541 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
542 cpu_save, cpu_load, env);
543 assert(cc->vmsd == NULL);
544 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
545 #endif
546 if (cc->vmsd != NULL) {
547 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
551 #if defined(TARGET_HAS_ICE)
552 #if defined(CONFIG_USER_ONLY)
553 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
555 tb_invalidate_phys_page_range(pc, pc + 1, 0);
557 #else
558 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
560 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
561 if (phys != -1) {
562 tb_invalidate_phys_addr(cpu->as,
563 phys | (pc & ~TARGET_PAGE_MASK));
566 #endif
567 #endif /* TARGET_HAS_ICE */
569 #if defined(CONFIG_USER_ONLY)
570 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
575 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
576 int flags, CPUWatchpoint **watchpoint)
578 return -ENOSYS;
580 #else
581 /* Add a watchpoint. */
582 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
583 int flags, CPUWatchpoint **watchpoint)
585 vaddr len_mask = ~(len - 1);
586 CPUWatchpoint *wp;
588 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
589 if ((len & (len - 1)) || (addr & ~len_mask) ||
590 len == 0 || len > TARGET_PAGE_SIZE) {
591 error_report("tried to set invalid watchpoint at %"
592 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
593 return -EINVAL;
595 wp = g_malloc(sizeof(*wp));
597 wp->vaddr = addr;
598 wp->len_mask = len_mask;
599 wp->flags = flags;
601 /* keep all GDB-injected watchpoints in front */
602 if (flags & BP_GDB) {
603 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
604 } else {
605 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
608 tlb_flush_page(cpu, addr);
610 if (watchpoint)
611 *watchpoint = wp;
612 return 0;
615 /* Remove a specific watchpoint. */
616 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
617 int flags)
619 vaddr len_mask = ~(len - 1);
620 CPUWatchpoint *wp;
622 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
623 if (addr == wp->vaddr && len_mask == wp->len_mask
624 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
625 cpu_watchpoint_remove_by_ref(cpu, wp);
626 return 0;
629 return -ENOENT;
632 /* Remove a specific watchpoint by reference. */
633 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
635 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
637 tlb_flush_page(cpu, watchpoint->vaddr);
639 g_free(watchpoint);
642 /* Remove all matching watchpoints. */
643 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
645 CPUWatchpoint *wp, *next;
647 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
648 if (wp->flags & mask) {
649 cpu_watchpoint_remove_by_ref(cpu, wp);
653 #endif
655 /* Add a breakpoint. */
656 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
657 CPUBreakpoint **breakpoint)
659 #if defined(TARGET_HAS_ICE)
660 CPUBreakpoint *bp;
662 bp = g_malloc(sizeof(*bp));
664 bp->pc = pc;
665 bp->flags = flags;
667 /* keep all GDB-injected breakpoints in front */
668 if (flags & BP_GDB) {
669 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
670 } else {
671 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
674 breakpoint_invalidate(cpu, pc);
676 if (breakpoint) {
677 *breakpoint = bp;
679 return 0;
680 #else
681 return -ENOSYS;
682 #endif
685 /* Remove a specific breakpoint. */
686 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
688 #if defined(TARGET_HAS_ICE)
689 CPUBreakpoint *bp;
691 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
692 if (bp->pc == pc && bp->flags == flags) {
693 cpu_breakpoint_remove_by_ref(cpu, bp);
694 return 0;
697 return -ENOENT;
698 #else
699 return -ENOSYS;
700 #endif
703 /* Remove a specific breakpoint by reference. */
704 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
706 #if defined(TARGET_HAS_ICE)
707 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
709 breakpoint_invalidate(cpu, breakpoint->pc);
711 g_free(breakpoint);
712 #endif
715 /* Remove all matching breakpoints. */
716 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
718 #if defined(TARGET_HAS_ICE)
719 CPUBreakpoint *bp, *next;
721 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
722 if (bp->flags & mask) {
723 cpu_breakpoint_remove_by_ref(cpu, bp);
726 #endif
729 /* enable or disable single step mode. EXCP_DEBUG is returned by the
730 CPU loop after each instruction */
731 void cpu_single_step(CPUState *cpu, int enabled)
733 #if defined(TARGET_HAS_ICE)
734 if (cpu->singlestep_enabled != enabled) {
735 cpu->singlestep_enabled = enabled;
736 if (kvm_enabled()) {
737 kvm_update_guest_debug(cpu, 0);
738 } else {
739 /* must flush all the translated code to avoid inconsistencies */
740 /* XXX: only flush what is necessary */
741 CPUArchState *env = cpu->env_ptr;
742 tb_flush(env);
745 #endif
748 void cpu_abort(CPUState *cpu, const char *fmt, ...)
750 va_list ap;
751 va_list ap2;
753 va_start(ap, fmt);
754 va_copy(ap2, ap);
755 fprintf(stderr, "qemu: fatal: ");
756 vfprintf(stderr, fmt, ap);
757 fprintf(stderr, "\n");
758 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
759 if (qemu_log_enabled()) {
760 qemu_log("qemu: fatal: ");
761 qemu_log_vprintf(fmt, ap2);
762 qemu_log("\n");
763 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
764 qemu_log_flush();
765 qemu_log_close();
767 va_end(ap2);
768 va_end(ap);
769 #if defined(CONFIG_USER_ONLY)
771 struct sigaction act;
772 sigfillset(&act.sa_mask);
773 act.sa_handler = SIG_DFL;
774 sigaction(SIGABRT, &act, NULL);
776 #endif
777 abort();
780 #if !defined(CONFIG_USER_ONLY)
781 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
783 RAMBlock *block;
785 /* The list is protected by the iothread lock here. */
786 block = ram_list.mru_block;
787 if (block && addr - block->offset < block->length) {
788 goto found;
790 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
791 if (addr - block->offset < block->length) {
792 goto found;
796 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
797 abort();
799 found:
800 ram_list.mru_block = block;
801 return block;
804 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
806 ram_addr_t start1;
807 RAMBlock *block;
808 ram_addr_t end;
810 end = TARGET_PAGE_ALIGN(start + length);
811 start &= TARGET_PAGE_MASK;
813 block = qemu_get_ram_block(start);
814 assert(block == qemu_get_ram_block(end - 1));
815 start1 = (uintptr_t)block->host + (start - block->offset);
816 cpu_tlb_reset_dirty_all(start1, length);
819 /* Note: start and end must be within the same ram block. */
820 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
821 unsigned client)
823 if (length == 0)
824 return;
825 cpu_physical_memory_clear_dirty_range(start, length, client);
827 if (tcg_enabled()) {
828 tlb_reset_dirty_range_all(start, length);
832 static void cpu_physical_memory_set_dirty_tracking(bool enable)
834 in_migration = enable;
837 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
838 MemoryRegionSection *section,
839 target_ulong vaddr,
840 hwaddr paddr, hwaddr xlat,
841 int prot,
842 target_ulong *address)
844 hwaddr iotlb;
845 CPUWatchpoint *wp;
847 if (memory_region_is_ram(section->mr)) {
848 /* Normal RAM. */
849 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
850 + xlat;
851 if (!section->readonly) {
852 iotlb |= PHYS_SECTION_NOTDIRTY;
853 } else {
854 iotlb |= PHYS_SECTION_ROM;
856 } else {
857 iotlb = section - section->address_space->dispatch->map.sections;
858 iotlb += xlat;
861 /* Make accesses to pages with watchpoints go via the
862 watchpoint trap routines. */
863 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
864 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
865 /* Avoid trapping reads of pages with a write breakpoint. */
866 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
867 iotlb = PHYS_SECTION_WATCH + paddr;
868 *address |= TLB_MMIO;
869 break;
874 return iotlb;
876 #endif /* defined(CONFIG_USER_ONLY) */
878 #if !defined(CONFIG_USER_ONLY)
880 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
881 uint16_t section);
882 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
884 static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
887 * Set a custom physical guest memory alloator.
888 * Accelerators with unusual needs may need this. Hopefully, we can
889 * get rid of it eventually.
891 void phys_mem_set_alloc(void *(*alloc)(size_t))
893 phys_mem_alloc = alloc;
896 static uint16_t phys_section_add(PhysPageMap *map,
897 MemoryRegionSection *section)
899 /* The physical section number is ORed with a page-aligned
900 * pointer to produce the iotlb entries. Thus it should
901 * never overflow into the page-aligned value.
903 assert(map->sections_nb < TARGET_PAGE_SIZE);
905 if (map->sections_nb == map->sections_nb_alloc) {
906 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
907 map->sections = g_renew(MemoryRegionSection, map->sections,
908 map->sections_nb_alloc);
910 map->sections[map->sections_nb] = *section;
911 memory_region_ref(section->mr);
912 return map->sections_nb++;
915 static void phys_section_destroy(MemoryRegion *mr)
917 memory_region_unref(mr);
919 if (mr->subpage) {
920 subpage_t *subpage = container_of(mr, subpage_t, iomem);
921 object_unref(OBJECT(&subpage->iomem));
922 g_free(subpage);
926 static void phys_sections_free(PhysPageMap *map)
928 while (map->sections_nb > 0) {
929 MemoryRegionSection *section = &map->sections[--map->sections_nb];
930 phys_section_destroy(section->mr);
932 g_free(map->sections);
933 g_free(map->nodes);
936 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
938 subpage_t *subpage;
939 hwaddr base = section->offset_within_address_space
940 & TARGET_PAGE_MASK;
941 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
942 d->map.nodes, d->map.sections);
943 MemoryRegionSection subsection = {
944 .offset_within_address_space = base,
945 .size = int128_make64(TARGET_PAGE_SIZE),
947 hwaddr start, end;
949 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
951 if (!(existing->mr->subpage)) {
952 subpage = subpage_init(d->as, base);
953 subsection.address_space = d->as;
954 subsection.mr = &subpage->iomem;
955 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
956 phys_section_add(&d->map, &subsection));
957 } else {
958 subpage = container_of(existing->mr, subpage_t, iomem);
960 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
961 end = start + int128_get64(section->size) - 1;
962 subpage_register(subpage, start, end,
963 phys_section_add(&d->map, section));
967 static void register_multipage(AddressSpaceDispatch *d,
968 MemoryRegionSection *section)
970 hwaddr start_addr = section->offset_within_address_space;
971 uint16_t section_index = phys_section_add(&d->map, section);
972 uint64_t num_pages = int128_get64(int128_rshift(section->size,
973 TARGET_PAGE_BITS));
975 assert(num_pages);
976 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
979 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
981 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
982 AddressSpaceDispatch *d = as->next_dispatch;
983 MemoryRegionSection now = *section, remain = *section;
984 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
986 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
987 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
988 - now.offset_within_address_space;
990 now.size = int128_min(int128_make64(left), now.size);
991 register_subpage(d, &now);
992 } else {
993 now.size = int128_zero();
995 while (int128_ne(remain.size, now.size)) {
996 remain.size = int128_sub(remain.size, now.size);
997 remain.offset_within_address_space += int128_get64(now.size);
998 remain.offset_within_region += int128_get64(now.size);
999 now = remain;
1000 if (int128_lt(remain.size, page_size)) {
1001 register_subpage(d, &now);
1002 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1003 now.size = page_size;
1004 register_subpage(d, &now);
1005 } else {
1006 now.size = int128_and(now.size, int128_neg(page_size));
1007 register_multipage(d, &now);
1012 void qemu_flush_coalesced_mmio_buffer(void)
1014 if (kvm_enabled())
1015 kvm_flush_coalesced_mmio_buffer();
1018 void qemu_mutex_lock_ramlist(void)
1020 qemu_mutex_lock(&ram_list.mutex);
1023 void qemu_mutex_unlock_ramlist(void)
1025 qemu_mutex_unlock(&ram_list.mutex);
1028 #ifdef __linux__
1030 #include <sys/vfs.h>
1032 #define HUGETLBFS_MAGIC 0x958458f6
1034 static long gethugepagesize(const char *path)
1036 struct statfs fs;
1037 int ret;
1039 do {
1040 ret = statfs(path, &fs);
1041 } while (ret != 0 && errno == EINTR);
1043 if (ret != 0) {
1044 perror(path);
1045 return 0;
1048 if (fs.f_type != HUGETLBFS_MAGIC)
1049 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1051 return fs.f_bsize;
1054 static void *file_ram_alloc(RAMBlock *block,
1055 ram_addr_t memory,
1056 const char *path,
1057 Error **errp)
1059 char *filename;
1060 char *sanitized_name;
1061 char *c;
1062 void *area;
1063 int fd;
1064 unsigned long hpagesize;
1066 hpagesize = gethugepagesize(path);
1067 if (!hpagesize) {
1068 goto error;
1071 if (memory < hpagesize) {
1072 return NULL;
1075 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1076 error_setg(errp,
1077 "host lacks kvm mmu notifiers, -mem-path unsupported");
1078 goto error;
1081 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1082 sanitized_name = g_strdup(memory_region_name(block->mr));
1083 for (c = sanitized_name; *c != '\0'; c++) {
1084 if (*c == '/')
1085 *c = '_';
1088 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1089 sanitized_name);
1090 g_free(sanitized_name);
1092 fd = mkstemp(filename);
1093 if (fd < 0) {
1094 error_setg_errno(errp, errno,
1095 "unable to create backing store for hugepages");
1096 g_free(filename);
1097 goto error;
1099 unlink(filename);
1100 g_free(filename);
1102 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1105 * ftruncate is not supported by hugetlbfs in older
1106 * hosts, so don't bother bailing out on errors.
1107 * If anything goes wrong with it under other filesystems,
1108 * mmap will fail.
1110 if (ftruncate(fd, memory)) {
1111 perror("ftruncate");
1114 area = mmap(0, memory, PROT_READ | PROT_WRITE,
1115 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
1116 fd, 0);
1117 if (area == MAP_FAILED) {
1118 error_setg_errno(errp, errno,
1119 "unable to map backing store for hugepages");
1120 close(fd);
1121 goto error;
1124 if (mem_prealloc) {
1125 os_mem_prealloc(fd, area, memory);
1128 block->fd = fd;
1129 return area;
1131 error:
1132 if (mem_prealloc) {
1133 exit(1);
1135 return NULL;
1137 #endif
1139 static ram_addr_t find_ram_offset(ram_addr_t size)
1141 RAMBlock *block, *next_block;
1142 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1144 assert(size != 0); /* it would hand out same offset multiple times */
1146 if (QTAILQ_EMPTY(&ram_list.blocks))
1147 return 0;
1149 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1150 ram_addr_t end, next = RAM_ADDR_MAX;
1152 end = block->offset + block->length;
1154 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1155 if (next_block->offset >= end) {
1156 next = MIN(next, next_block->offset);
1159 if (next - end >= size && next - end < mingap) {
1160 offset = end;
1161 mingap = next - end;
1165 if (offset == RAM_ADDR_MAX) {
1166 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1167 (uint64_t)size);
1168 abort();
1171 return offset;
1174 ram_addr_t last_ram_offset(void)
1176 RAMBlock *block;
1177 ram_addr_t last = 0;
1179 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1180 last = MAX(last, block->offset + block->length);
1182 return last;
1185 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1187 int ret;
1189 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1190 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1191 "dump-guest-core", true)) {
1192 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1193 if (ret) {
1194 perror("qemu_madvise");
1195 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1196 "but dump_guest_core=off specified\n");
1201 static RAMBlock *find_ram_block(ram_addr_t addr)
1203 RAMBlock *block;
1205 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1206 if (block->offset == addr) {
1207 return block;
1211 return NULL;
1214 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1216 RAMBlock *new_block = find_ram_block(addr);
1217 RAMBlock *block;
1219 assert(new_block);
1220 assert(!new_block->idstr[0]);
1222 if (dev) {
1223 char *id = qdev_get_dev_path(dev);
1224 if (id) {
1225 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1226 g_free(id);
1229 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1231 /* This assumes the iothread lock is taken here too. */
1232 qemu_mutex_lock_ramlist();
1233 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1234 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1235 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1236 new_block->idstr);
1237 abort();
1240 qemu_mutex_unlock_ramlist();
1243 void qemu_ram_unset_idstr(ram_addr_t addr)
1245 RAMBlock *block = find_ram_block(addr);
1247 if (block) {
1248 memset(block->idstr, 0, sizeof(block->idstr));
1252 static int memory_try_enable_merging(void *addr, size_t len)
1254 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1255 /* disabled by the user */
1256 return 0;
1259 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1262 static ram_addr_t ram_block_add(RAMBlock *new_block)
1264 RAMBlock *block;
1265 ram_addr_t old_ram_size, new_ram_size;
1267 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1269 /* This assumes the iothread lock is taken here too. */
1270 qemu_mutex_lock_ramlist();
1271 new_block->offset = find_ram_offset(new_block->length);
1273 if (!new_block->host) {
1274 if (xen_enabled()) {
1275 xen_ram_alloc(new_block->offset, new_block->length, new_block->mr);
1276 } else {
1277 new_block->host = phys_mem_alloc(new_block->length);
1278 if (!new_block->host) {
1279 fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
1280 memory_region_name(new_block->mr), strerror(errno));
1281 exit(1);
1283 memory_try_enable_merging(new_block->host, new_block->length);
1287 /* Keep the list sorted from biggest to smallest block. */
1288 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1289 if (block->length < new_block->length) {
1290 break;
1293 if (block) {
1294 QTAILQ_INSERT_BEFORE(block, new_block, next);
1295 } else {
1296 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1298 ram_list.mru_block = NULL;
1300 ram_list.version++;
1301 qemu_mutex_unlock_ramlist();
1303 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1305 if (new_ram_size > old_ram_size) {
1306 int i;
1307 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1308 ram_list.dirty_memory[i] =
1309 bitmap_zero_extend(ram_list.dirty_memory[i],
1310 old_ram_size, new_ram_size);
1313 cpu_physical_memory_set_dirty_range(new_block->offset, new_block->length);
1315 qemu_ram_setup_dump(new_block->host, new_block->length);
1316 qemu_madvise(new_block->host, new_block->length, QEMU_MADV_HUGEPAGE);
1317 qemu_madvise(new_block->host, new_block->length, QEMU_MADV_DONTFORK);
1319 if (kvm_enabled()) {
1320 kvm_setup_guest_memory(new_block->host, new_block->length);
1323 return new_block->offset;
1326 #ifdef __linux__
1327 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1328 bool share, const char *mem_path,
1329 Error **errp)
1331 RAMBlock *new_block;
1333 if (xen_enabled()) {
1334 error_setg(errp, "-mem-path not supported with Xen");
1335 return -1;
1338 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1340 * file_ram_alloc() needs to allocate just like
1341 * phys_mem_alloc, but we haven't bothered to provide
1342 * a hook there.
1344 error_setg(errp,
1345 "-mem-path not supported with this accelerator");
1346 return -1;
1349 size = TARGET_PAGE_ALIGN(size);
1350 new_block = g_malloc0(sizeof(*new_block));
1351 new_block->mr = mr;
1352 new_block->length = size;
1353 new_block->flags = share ? RAM_SHARED : 0;
1354 new_block->host = file_ram_alloc(new_block, size,
1355 mem_path, errp);
1356 if (!new_block->host) {
1357 g_free(new_block);
1358 return -1;
1361 return ram_block_add(new_block);
1363 #endif
1365 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1366 MemoryRegion *mr)
1368 RAMBlock *new_block;
1370 size = TARGET_PAGE_ALIGN(size);
1371 new_block = g_malloc0(sizeof(*new_block));
1372 new_block->mr = mr;
1373 new_block->length = size;
1374 new_block->fd = -1;
1375 new_block->host = host;
1376 if (host) {
1377 new_block->flags |= RAM_PREALLOC;
1379 return ram_block_add(new_block);
1382 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1384 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1387 void qemu_ram_free_from_ptr(ram_addr_t addr)
1389 RAMBlock *block;
1391 /* This assumes the iothread lock is taken here too. */
1392 qemu_mutex_lock_ramlist();
1393 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1394 if (addr == block->offset) {
1395 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1396 ram_list.mru_block = NULL;
1397 ram_list.version++;
1398 g_free(block);
1399 break;
1402 qemu_mutex_unlock_ramlist();
1405 void qemu_ram_free(ram_addr_t addr)
1407 RAMBlock *block;
1409 /* This assumes the iothread lock is taken here too. */
1410 qemu_mutex_lock_ramlist();
1411 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1412 if (addr == block->offset) {
1413 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1414 ram_list.mru_block = NULL;
1415 ram_list.version++;
1416 if (block->flags & RAM_PREALLOC) {
1418 } else if (xen_enabled()) {
1419 xen_invalidate_map_cache_entry(block->host);
1420 #ifndef _WIN32
1421 } else if (block->fd >= 0) {
1422 munmap(block->host, block->length);
1423 close(block->fd);
1424 #endif
1425 } else {
1426 qemu_anon_ram_free(block->host, block->length);
1428 g_free(block);
1429 break;
1432 qemu_mutex_unlock_ramlist();
1436 #ifndef _WIN32
1437 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1439 RAMBlock *block;
1440 ram_addr_t offset;
1441 int flags;
1442 void *area, *vaddr;
1444 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1445 offset = addr - block->offset;
1446 if (offset < block->length) {
1447 vaddr = block->host + offset;
1448 if (block->flags & RAM_PREALLOC) {
1450 } else if (xen_enabled()) {
1451 abort();
1452 } else {
1453 flags = MAP_FIXED;
1454 munmap(vaddr, length);
1455 if (block->fd >= 0) {
1456 flags |= (block->flags & RAM_SHARED ?
1457 MAP_SHARED : MAP_PRIVATE);
1458 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1459 flags, block->fd, offset);
1460 } else {
1462 * Remap needs to match alloc. Accelerators that
1463 * set phys_mem_alloc never remap. If they did,
1464 * we'd need a remap hook here.
1466 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1468 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1469 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1470 flags, -1, 0);
1472 if (area != vaddr) {
1473 fprintf(stderr, "Could not remap addr: "
1474 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1475 length, addr);
1476 exit(1);
1478 memory_try_enable_merging(vaddr, length);
1479 qemu_ram_setup_dump(vaddr, length);
1481 return;
1485 #endif /* !_WIN32 */
1487 int qemu_get_ram_fd(ram_addr_t addr)
1489 RAMBlock *block = qemu_get_ram_block(addr);
1491 return block->fd;
1494 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1496 RAMBlock *block = qemu_get_ram_block(addr);
1498 return block->host;
1501 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1502 With the exception of the softmmu code in this file, this should
1503 only be used for local memory (e.g. video ram) that the device owns,
1504 and knows it isn't going to access beyond the end of the block.
1506 It should not be used for general purpose DMA.
1507 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1509 void *qemu_get_ram_ptr(ram_addr_t addr)
1511 RAMBlock *block = qemu_get_ram_block(addr);
1513 if (xen_enabled()) {
1514 /* We need to check if the requested address is in the RAM
1515 * because we don't want to map the entire memory in QEMU.
1516 * In that case just map until the end of the page.
1518 if (block->offset == 0) {
1519 return xen_map_cache(addr, 0, 0);
1520 } else if (block->host == NULL) {
1521 block->host =
1522 xen_map_cache(block->offset, block->length, 1);
1525 return block->host + (addr - block->offset);
1528 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1529 * but takes a size argument */
1530 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1532 if (*size == 0) {
1533 return NULL;
1535 if (xen_enabled()) {
1536 return xen_map_cache(addr, *size, 1);
1537 } else {
1538 RAMBlock *block;
1540 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1541 if (addr - block->offset < block->length) {
1542 if (addr - block->offset + *size > block->length)
1543 *size = block->length - addr + block->offset;
1544 return block->host + (addr - block->offset);
1548 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1549 abort();
1553 /* Some of the softmmu routines need to translate from a host pointer
1554 (typically a TLB entry) back to a ram offset. */
1555 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1557 RAMBlock *block;
1558 uint8_t *host = ptr;
1560 if (xen_enabled()) {
1561 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1562 return qemu_get_ram_block(*ram_addr)->mr;
1565 block = ram_list.mru_block;
1566 if (block && block->host && host - block->host < block->length) {
1567 goto found;
1570 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1571 /* This case append when the block is not mapped. */
1572 if (block->host == NULL) {
1573 continue;
1575 if (host - block->host < block->length) {
1576 goto found;
1580 return NULL;
1582 found:
1583 *ram_addr = block->offset + (host - block->host);
1584 return block->mr;
1587 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1588 uint64_t val, unsigned size)
1590 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1591 tb_invalidate_phys_page_fast(ram_addr, size);
1593 switch (size) {
1594 case 1:
1595 stb_p(qemu_get_ram_ptr(ram_addr), val);
1596 break;
1597 case 2:
1598 stw_p(qemu_get_ram_ptr(ram_addr), val);
1599 break;
1600 case 4:
1601 stl_p(qemu_get_ram_ptr(ram_addr), val);
1602 break;
1603 default:
1604 abort();
1606 cpu_physical_memory_set_dirty_range_nocode(ram_addr, size);
1607 /* we remove the notdirty callback only if the code has been
1608 flushed */
1609 if (!cpu_physical_memory_is_clean(ram_addr)) {
1610 CPUArchState *env = current_cpu->env_ptr;
1611 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1615 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1616 unsigned size, bool is_write)
1618 return is_write;
1621 static const MemoryRegionOps notdirty_mem_ops = {
1622 .write = notdirty_mem_write,
1623 .valid.accepts = notdirty_mem_accepts,
1624 .endianness = DEVICE_NATIVE_ENDIAN,
1627 /* Generate a debug exception if a watchpoint has been hit. */
1628 static void check_watchpoint(int offset, int len_mask, int flags)
1630 CPUState *cpu = current_cpu;
1631 CPUArchState *env = cpu->env_ptr;
1632 target_ulong pc, cs_base;
1633 target_ulong vaddr;
1634 CPUWatchpoint *wp;
1635 int cpu_flags;
1637 if (cpu->watchpoint_hit) {
1638 /* We re-entered the check after replacing the TB. Now raise
1639 * the debug interrupt so that is will trigger after the
1640 * current instruction. */
1641 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1642 return;
1644 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1645 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1646 if ((vaddr == (wp->vaddr & len_mask) ||
1647 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1648 wp->flags |= BP_WATCHPOINT_HIT;
1649 if (!cpu->watchpoint_hit) {
1650 cpu->watchpoint_hit = wp;
1651 tb_check_watchpoint(cpu);
1652 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1653 cpu->exception_index = EXCP_DEBUG;
1654 cpu_loop_exit(cpu);
1655 } else {
1656 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1657 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1658 cpu_resume_from_signal(cpu, NULL);
1661 } else {
1662 wp->flags &= ~BP_WATCHPOINT_HIT;
1667 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1668 so these check for a hit then pass through to the normal out-of-line
1669 phys routines. */
1670 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1671 unsigned size)
1673 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1674 switch (size) {
1675 case 1: return ldub_phys(&address_space_memory, addr);
1676 case 2: return lduw_phys(&address_space_memory, addr);
1677 case 4: return ldl_phys(&address_space_memory, addr);
1678 default: abort();
1682 static void watch_mem_write(void *opaque, hwaddr addr,
1683 uint64_t val, unsigned size)
1685 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1686 switch (size) {
1687 case 1:
1688 stb_phys(&address_space_memory, addr, val);
1689 break;
1690 case 2:
1691 stw_phys(&address_space_memory, addr, val);
1692 break;
1693 case 4:
1694 stl_phys(&address_space_memory, addr, val);
1695 break;
1696 default: abort();
1700 static const MemoryRegionOps watch_mem_ops = {
1701 .read = watch_mem_read,
1702 .write = watch_mem_write,
1703 .endianness = DEVICE_NATIVE_ENDIAN,
1706 static uint64_t subpage_read(void *opaque, hwaddr addr,
1707 unsigned len)
1709 subpage_t *subpage = opaque;
1710 uint8_t buf[4];
1712 #if defined(DEBUG_SUBPAGE)
1713 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1714 subpage, len, addr);
1715 #endif
1716 address_space_read(subpage->as, addr + subpage->base, buf, len);
1717 switch (len) {
1718 case 1:
1719 return ldub_p(buf);
1720 case 2:
1721 return lduw_p(buf);
1722 case 4:
1723 return ldl_p(buf);
1724 default:
1725 abort();
1729 static void subpage_write(void *opaque, hwaddr addr,
1730 uint64_t value, unsigned len)
1732 subpage_t *subpage = opaque;
1733 uint8_t buf[4];
1735 #if defined(DEBUG_SUBPAGE)
1736 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1737 " value %"PRIx64"\n",
1738 __func__, subpage, len, addr, value);
1739 #endif
1740 switch (len) {
1741 case 1:
1742 stb_p(buf, value);
1743 break;
1744 case 2:
1745 stw_p(buf, value);
1746 break;
1747 case 4:
1748 stl_p(buf, value);
1749 break;
1750 default:
1751 abort();
1753 address_space_write(subpage->as, addr + subpage->base, buf, len);
1756 static bool subpage_accepts(void *opaque, hwaddr addr,
1757 unsigned len, bool is_write)
1759 subpage_t *subpage = opaque;
1760 #if defined(DEBUG_SUBPAGE)
1761 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1762 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1763 #endif
1765 return address_space_access_valid(subpage->as, addr + subpage->base,
1766 len, is_write);
1769 static const MemoryRegionOps subpage_ops = {
1770 .read = subpage_read,
1771 .write = subpage_write,
1772 .valid.accepts = subpage_accepts,
1773 .endianness = DEVICE_NATIVE_ENDIAN,
1776 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1777 uint16_t section)
1779 int idx, eidx;
1781 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1782 return -1;
1783 idx = SUBPAGE_IDX(start);
1784 eidx = SUBPAGE_IDX(end);
1785 #if defined(DEBUG_SUBPAGE)
1786 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1787 __func__, mmio, start, end, idx, eidx, section);
1788 #endif
1789 for (; idx <= eidx; idx++) {
1790 mmio->sub_section[idx] = section;
1793 return 0;
1796 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1798 subpage_t *mmio;
1800 mmio = g_malloc0(sizeof(subpage_t));
1802 mmio->as = as;
1803 mmio->base = base;
1804 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1805 NULL, TARGET_PAGE_SIZE);
1806 mmio->iomem.subpage = true;
1807 #if defined(DEBUG_SUBPAGE)
1808 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1809 mmio, base, TARGET_PAGE_SIZE);
1810 #endif
1811 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1813 return mmio;
1816 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
1817 MemoryRegion *mr)
1819 assert(as);
1820 MemoryRegionSection section = {
1821 .address_space = as,
1822 .mr = mr,
1823 .offset_within_address_space = 0,
1824 .offset_within_region = 0,
1825 .size = int128_2_64(),
1828 return phys_section_add(map, &section);
1831 MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1833 return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1836 static void io_mem_init(void)
1838 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
1839 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1840 NULL, UINT64_MAX);
1841 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1842 NULL, UINT64_MAX);
1843 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1844 NULL, UINT64_MAX);
1847 static void mem_begin(MemoryListener *listener)
1849 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1850 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1851 uint16_t n;
1853 n = dummy_section(&d->map, as, &io_mem_unassigned);
1854 assert(n == PHYS_SECTION_UNASSIGNED);
1855 n = dummy_section(&d->map, as, &io_mem_notdirty);
1856 assert(n == PHYS_SECTION_NOTDIRTY);
1857 n = dummy_section(&d->map, as, &io_mem_rom);
1858 assert(n == PHYS_SECTION_ROM);
1859 n = dummy_section(&d->map, as, &io_mem_watch);
1860 assert(n == PHYS_SECTION_WATCH);
1862 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1863 d->as = as;
1864 as->next_dispatch = d;
1867 static void mem_commit(MemoryListener *listener)
1869 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1870 AddressSpaceDispatch *cur = as->dispatch;
1871 AddressSpaceDispatch *next = as->next_dispatch;
1873 phys_page_compact_all(next, next->map.nodes_nb);
1875 as->dispatch = next;
1877 if (cur) {
1878 phys_sections_free(&cur->map);
1879 g_free(cur);
1883 static void tcg_commit(MemoryListener *listener)
1885 CPUState *cpu;
1887 /* since each CPU stores ram addresses in its TLB cache, we must
1888 reset the modified entries */
1889 /* XXX: slow ! */
1890 CPU_FOREACH(cpu) {
1891 /* FIXME: Disentangle the cpu.h circular files deps so we can
1892 directly get the right CPU from listener. */
1893 if (cpu->tcg_as_listener != listener) {
1894 continue;
1896 tlb_flush(cpu, 1);
1900 static void core_log_global_start(MemoryListener *listener)
1902 cpu_physical_memory_set_dirty_tracking(true);
1905 static void core_log_global_stop(MemoryListener *listener)
1907 cpu_physical_memory_set_dirty_tracking(false);
1910 static MemoryListener core_memory_listener = {
1911 .log_global_start = core_log_global_start,
1912 .log_global_stop = core_log_global_stop,
1913 .priority = 1,
1916 void address_space_init_dispatch(AddressSpace *as)
1918 as->dispatch = NULL;
1919 as->dispatch_listener = (MemoryListener) {
1920 .begin = mem_begin,
1921 .commit = mem_commit,
1922 .region_add = mem_add,
1923 .region_nop = mem_add,
1924 .priority = 0,
1926 memory_listener_register(&as->dispatch_listener, as);
1929 void address_space_destroy_dispatch(AddressSpace *as)
1931 AddressSpaceDispatch *d = as->dispatch;
1933 memory_listener_unregister(&as->dispatch_listener);
1934 g_free(d);
1935 as->dispatch = NULL;
1938 static void memory_map_init(void)
1940 system_memory = g_malloc(sizeof(*system_memory));
1942 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
1943 address_space_init(&address_space_memory, system_memory, "memory");
1945 system_io = g_malloc(sizeof(*system_io));
1946 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
1947 65536);
1948 address_space_init(&address_space_io, system_io, "I/O");
1950 memory_listener_register(&core_memory_listener, &address_space_memory);
1953 MemoryRegion *get_system_memory(void)
1955 return system_memory;
1958 MemoryRegion *get_system_io(void)
1960 return system_io;
1963 #endif /* !defined(CONFIG_USER_ONLY) */
1965 /* physical memory access (slow version, mainly for debug) */
1966 #if defined(CONFIG_USER_ONLY)
1967 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
1968 uint8_t *buf, int len, int is_write)
1970 int l, flags;
1971 target_ulong page;
1972 void * p;
1974 while (len > 0) {
1975 page = addr & TARGET_PAGE_MASK;
1976 l = (page + TARGET_PAGE_SIZE) - addr;
1977 if (l > len)
1978 l = len;
1979 flags = page_get_flags(page);
1980 if (!(flags & PAGE_VALID))
1981 return -1;
1982 if (is_write) {
1983 if (!(flags & PAGE_WRITE))
1984 return -1;
1985 /* XXX: this code should not depend on lock_user */
1986 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1987 return -1;
1988 memcpy(p, buf, l);
1989 unlock_user(p, addr, l);
1990 } else {
1991 if (!(flags & PAGE_READ))
1992 return -1;
1993 /* XXX: this code should not depend on lock_user */
1994 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1995 return -1;
1996 memcpy(buf, p, l);
1997 unlock_user(p, addr, 0);
1999 len -= l;
2000 buf += l;
2001 addr += l;
2003 return 0;
2006 #else
2008 static void invalidate_and_set_dirty(hwaddr addr,
2009 hwaddr length)
2011 if (cpu_physical_memory_is_clean(addr)) {
2012 /* invalidate code */
2013 tb_invalidate_phys_page_range(addr, addr + length, 0);
2014 /* set dirty bit */
2015 cpu_physical_memory_set_dirty_range_nocode(addr, length);
2017 xen_modified_memory(addr, length);
2020 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2022 unsigned access_size_max = mr->ops->valid.max_access_size;
2024 /* Regions are assumed to support 1-4 byte accesses unless
2025 otherwise specified. */
2026 if (access_size_max == 0) {
2027 access_size_max = 4;
2030 /* Bound the maximum access by the alignment of the address. */
2031 if (!mr->ops->impl.unaligned) {
2032 unsigned align_size_max = addr & -addr;
2033 if (align_size_max != 0 && align_size_max < access_size_max) {
2034 access_size_max = align_size_max;
2038 /* Don't attempt accesses larger than the maximum. */
2039 if (l > access_size_max) {
2040 l = access_size_max;
2042 if (l & (l - 1)) {
2043 l = 1 << (qemu_fls(l) - 1);
2046 return l;
2049 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
2050 int len, bool is_write)
2052 hwaddr l;
2053 uint8_t *ptr;
2054 uint64_t val;
2055 hwaddr addr1;
2056 MemoryRegion *mr;
2057 bool error = false;
2059 while (len > 0) {
2060 l = len;
2061 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2063 if (is_write) {
2064 if (!memory_access_is_direct(mr, is_write)) {
2065 l = memory_access_size(mr, l, addr1);
2066 /* XXX: could force current_cpu to NULL to avoid
2067 potential bugs */
2068 switch (l) {
2069 case 8:
2070 /* 64 bit write access */
2071 val = ldq_p(buf);
2072 error |= io_mem_write(mr, addr1, val, 8);
2073 break;
2074 case 4:
2075 /* 32 bit write access */
2076 val = ldl_p(buf);
2077 error |= io_mem_write(mr, addr1, val, 4);
2078 break;
2079 case 2:
2080 /* 16 bit write access */
2081 val = lduw_p(buf);
2082 error |= io_mem_write(mr, addr1, val, 2);
2083 break;
2084 case 1:
2085 /* 8 bit write access */
2086 val = ldub_p(buf);
2087 error |= io_mem_write(mr, addr1, val, 1);
2088 break;
2089 default:
2090 abort();
2092 } else {
2093 addr1 += memory_region_get_ram_addr(mr);
2094 /* RAM case */
2095 ptr = qemu_get_ram_ptr(addr1);
2096 memcpy(ptr, buf, l);
2097 invalidate_and_set_dirty(addr1, l);
2099 } else {
2100 if (!memory_access_is_direct(mr, is_write)) {
2101 /* I/O case */
2102 l = memory_access_size(mr, l, addr1);
2103 switch (l) {
2104 case 8:
2105 /* 64 bit read access */
2106 error |= io_mem_read(mr, addr1, &val, 8);
2107 stq_p(buf, val);
2108 break;
2109 case 4:
2110 /* 32 bit read access */
2111 error |= io_mem_read(mr, addr1, &val, 4);
2112 stl_p(buf, val);
2113 break;
2114 case 2:
2115 /* 16 bit read access */
2116 error |= io_mem_read(mr, addr1, &val, 2);
2117 stw_p(buf, val);
2118 break;
2119 case 1:
2120 /* 8 bit read access */
2121 error |= io_mem_read(mr, addr1, &val, 1);
2122 stb_p(buf, val);
2123 break;
2124 default:
2125 abort();
2127 } else {
2128 /* RAM case */
2129 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2130 memcpy(buf, ptr, l);
2133 len -= l;
2134 buf += l;
2135 addr += l;
2138 return error;
2141 bool address_space_write(AddressSpace *as, hwaddr addr,
2142 const uint8_t *buf, int len)
2144 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2147 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2149 return address_space_rw(as, addr, buf, len, false);
2153 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2154 int len, int is_write)
2156 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2159 enum write_rom_type {
2160 WRITE_DATA,
2161 FLUSH_CACHE,
2164 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2165 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2167 hwaddr l;
2168 uint8_t *ptr;
2169 hwaddr addr1;
2170 MemoryRegion *mr;
2172 while (len > 0) {
2173 l = len;
2174 mr = address_space_translate(as, addr, &addr1, &l, true);
2176 if (!(memory_region_is_ram(mr) ||
2177 memory_region_is_romd(mr))) {
2178 /* do nothing */
2179 } else {
2180 addr1 += memory_region_get_ram_addr(mr);
2181 /* ROM/RAM case */
2182 ptr = qemu_get_ram_ptr(addr1);
2183 switch (type) {
2184 case WRITE_DATA:
2185 memcpy(ptr, buf, l);
2186 invalidate_and_set_dirty(addr1, l);
2187 break;
2188 case FLUSH_CACHE:
2189 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2190 break;
2193 len -= l;
2194 buf += l;
2195 addr += l;
2199 /* used for ROM loading : can write in RAM and ROM */
2200 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2201 const uint8_t *buf, int len)
2203 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2206 void cpu_flush_icache_range(hwaddr start, int len)
2209 * This function should do the same thing as an icache flush that was
2210 * triggered from within the guest. For TCG we are always cache coherent,
2211 * so there is no need to flush anything. For KVM / Xen we need to flush
2212 * the host's instruction cache at least.
2214 if (tcg_enabled()) {
2215 return;
2218 cpu_physical_memory_write_rom_internal(&address_space_memory,
2219 start, NULL, len, FLUSH_CACHE);
2222 typedef struct {
2223 MemoryRegion *mr;
2224 void *buffer;
2225 hwaddr addr;
2226 hwaddr len;
2227 } BounceBuffer;
2229 static BounceBuffer bounce;
2231 typedef struct MapClient {
2232 void *opaque;
2233 void (*callback)(void *opaque);
2234 QLIST_ENTRY(MapClient) link;
2235 } MapClient;
2237 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2238 = QLIST_HEAD_INITIALIZER(map_client_list);
2240 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2242 MapClient *client = g_malloc(sizeof(*client));
2244 client->opaque = opaque;
2245 client->callback = callback;
2246 QLIST_INSERT_HEAD(&map_client_list, client, link);
2247 return client;
2250 static void cpu_unregister_map_client(void *_client)
2252 MapClient *client = (MapClient *)_client;
2254 QLIST_REMOVE(client, link);
2255 g_free(client);
2258 static void cpu_notify_map_clients(void)
2260 MapClient *client;
2262 while (!QLIST_EMPTY(&map_client_list)) {
2263 client = QLIST_FIRST(&map_client_list);
2264 client->callback(client->opaque);
2265 cpu_unregister_map_client(client);
2269 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2271 MemoryRegion *mr;
2272 hwaddr l, xlat;
2274 while (len > 0) {
2275 l = len;
2276 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2277 if (!memory_access_is_direct(mr, is_write)) {
2278 l = memory_access_size(mr, l, addr);
2279 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2280 return false;
2284 len -= l;
2285 addr += l;
2287 return true;
2290 /* Map a physical memory region into a host virtual address.
2291 * May map a subset of the requested range, given by and returned in *plen.
2292 * May return NULL if resources needed to perform the mapping are exhausted.
2293 * Use only for reads OR writes - not for read-modify-write operations.
2294 * Use cpu_register_map_client() to know when retrying the map operation is
2295 * likely to succeed.
2297 void *address_space_map(AddressSpace *as,
2298 hwaddr addr,
2299 hwaddr *plen,
2300 bool is_write)
2302 hwaddr len = *plen;
2303 hwaddr done = 0;
2304 hwaddr l, xlat, base;
2305 MemoryRegion *mr, *this_mr;
2306 ram_addr_t raddr;
2308 if (len == 0) {
2309 return NULL;
2312 l = len;
2313 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2314 if (!memory_access_is_direct(mr, is_write)) {
2315 if (bounce.buffer) {
2316 return NULL;
2318 /* Avoid unbounded allocations */
2319 l = MIN(l, TARGET_PAGE_SIZE);
2320 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2321 bounce.addr = addr;
2322 bounce.len = l;
2324 memory_region_ref(mr);
2325 bounce.mr = mr;
2326 if (!is_write) {
2327 address_space_read(as, addr, bounce.buffer, l);
2330 *plen = l;
2331 return bounce.buffer;
2334 base = xlat;
2335 raddr = memory_region_get_ram_addr(mr);
2337 for (;;) {
2338 len -= l;
2339 addr += l;
2340 done += l;
2341 if (len == 0) {
2342 break;
2345 l = len;
2346 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2347 if (this_mr != mr || xlat != base + done) {
2348 break;
2352 memory_region_ref(mr);
2353 *plen = done;
2354 return qemu_ram_ptr_length(raddr + base, plen);
2357 /* Unmaps a memory region previously mapped by address_space_map().
2358 * Will also mark the memory as dirty if is_write == 1. access_len gives
2359 * the amount of memory that was actually read or written by the caller.
2361 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2362 int is_write, hwaddr access_len)
2364 if (buffer != bounce.buffer) {
2365 MemoryRegion *mr;
2366 ram_addr_t addr1;
2368 mr = qemu_ram_addr_from_host(buffer, &addr1);
2369 assert(mr != NULL);
2370 if (is_write) {
2371 invalidate_and_set_dirty(addr1, access_len);
2373 if (xen_enabled()) {
2374 xen_invalidate_map_cache_entry(buffer);
2376 memory_region_unref(mr);
2377 return;
2379 if (is_write) {
2380 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2382 qemu_vfree(bounce.buffer);
2383 bounce.buffer = NULL;
2384 memory_region_unref(bounce.mr);
2385 cpu_notify_map_clients();
2388 void *cpu_physical_memory_map(hwaddr addr,
2389 hwaddr *plen,
2390 int is_write)
2392 return address_space_map(&address_space_memory, addr, plen, is_write);
2395 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2396 int is_write, hwaddr access_len)
2398 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2401 /* warning: addr must be aligned */
2402 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2403 enum device_endian endian)
2405 uint8_t *ptr;
2406 uint64_t val;
2407 MemoryRegion *mr;
2408 hwaddr l = 4;
2409 hwaddr addr1;
2411 mr = address_space_translate(as, addr, &addr1, &l, false);
2412 if (l < 4 || !memory_access_is_direct(mr, false)) {
2413 /* I/O case */
2414 io_mem_read(mr, addr1, &val, 4);
2415 #if defined(TARGET_WORDS_BIGENDIAN)
2416 if (endian == DEVICE_LITTLE_ENDIAN) {
2417 val = bswap32(val);
2419 #else
2420 if (endian == DEVICE_BIG_ENDIAN) {
2421 val = bswap32(val);
2423 #endif
2424 } else {
2425 /* RAM case */
2426 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2427 & TARGET_PAGE_MASK)
2428 + addr1);
2429 switch (endian) {
2430 case DEVICE_LITTLE_ENDIAN:
2431 val = ldl_le_p(ptr);
2432 break;
2433 case DEVICE_BIG_ENDIAN:
2434 val = ldl_be_p(ptr);
2435 break;
2436 default:
2437 val = ldl_p(ptr);
2438 break;
2441 return val;
2444 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2446 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2449 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2451 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2454 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2456 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2459 /* warning: addr must be aligned */
2460 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2461 enum device_endian endian)
2463 uint8_t *ptr;
2464 uint64_t val;
2465 MemoryRegion *mr;
2466 hwaddr l = 8;
2467 hwaddr addr1;
2469 mr = address_space_translate(as, addr, &addr1, &l,
2470 false);
2471 if (l < 8 || !memory_access_is_direct(mr, false)) {
2472 /* I/O case */
2473 io_mem_read(mr, addr1, &val, 8);
2474 #if defined(TARGET_WORDS_BIGENDIAN)
2475 if (endian == DEVICE_LITTLE_ENDIAN) {
2476 val = bswap64(val);
2478 #else
2479 if (endian == DEVICE_BIG_ENDIAN) {
2480 val = bswap64(val);
2482 #endif
2483 } else {
2484 /* RAM case */
2485 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2486 & TARGET_PAGE_MASK)
2487 + addr1);
2488 switch (endian) {
2489 case DEVICE_LITTLE_ENDIAN:
2490 val = ldq_le_p(ptr);
2491 break;
2492 case DEVICE_BIG_ENDIAN:
2493 val = ldq_be_p(ptr);
2494 break;
2495 default:
2496 val = ldq_p(ptr);
2497 break;
2500 return val;
2503 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2505 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2508 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2510 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2513 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2515 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2518 /* XXX: optimize */
2519 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2521 uint8_t val;
2522 address_space_rw(as, addr, &val, 1, 0);
2523 return val;
2526 /* warning: addr must be aligned */
2527 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2528 enum device_endian endian)
2530 uint8_t *ptr;
2531 uint64_t val;
2532 MemoryRegion *mr;
2533 hwaddr l = 2;
2534 hwaddr addr1;
2536 mr = address_space_translate(as, addr, &addr1, &l,
2537 false);
2538 if (l < 2 || !memory_access_is_direct(mr, false)) {
2539 /* I/O case */
2540 io_mem_read(mr, addr1, &val, 2);
2541 #if defined(TARGET_WORDS_BIGENDIAN)
2542 if (endian == DEVICE_LITTLE_ENDIAN) {
2543 val = bswap16(val);
2545 #else
2546 if (endian == DEVICE_BIG_ENDIAN) {
2547 val = bswap16(val);
2549 #endif
2550 } else {
2551 /* RAM case */
2552 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2553 & TARGET_PAGE_MASK)
2554 + addr1);
2555 switch (endian) {
2556 case DEVICE_LITTLE_ENDIAN:
2557 val = lduw_le_p(ptr);
2558 break;
2559 case DEVICE_BIG_ENDIAN:
2560 val = lduw_be_p(ptr);
2561 break;
2562 default:
2563 val = lduw_p(ptr);
2564 break;
2567 return val;
2570 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2572 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2575 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2577 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2580 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2582 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2585 /* warning: addr must be aligned. The ram page is not masked as dirty
2586 and the code inside is not invalidated. It is useful if the dirty
2587 bits are used to track modified PTEs */
2588 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2590 uint8_t *ptr;
2591 MemoryRegion *mr;
2592 hwaddr l = 4;
2593 hwaddr addr1;
2595 mr = address_space_translate(as, addr, &addr1, &l,
2596 true);
2597 if (l < 4 || !memory_access_is_direct(mr, true)) {
2598 io_mem_write(mr, addr1, val, 4);
2599 } else {
2600 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2601 ptr = qemu_get_ram_ptr(addr1);
2602 stl_p(ptr, val);
2604 if (unlikely(in_migration)) {
2605 if (cpu_physical_memory_is_clean(addr1)) {
2606 /* invalidate code */
2607 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2608 /* set dirty bit */
2609 cpu_physical_memory_set_dirty_range_nocode(addr1, 4);
2615 /* warning: addr must be aligned */
2616 static inline void stl_phys_internal(AddressSpace *as,
2617 hwaddr addr, uint32_t val,
2618 enum device_endian endian)
2620 uint8_t *ptr;
2621 MemoryRegion *mr;
2622 hwaddr l = 4;
2623 hwaddr addr1;
2625 mr = address_space_translate(as, addr, &addr1, &l,
2626 true);
2627 if (l < 4 || !memory_access_is_direct(mr, true)) {
2628 #if defined(TARGET_WORDS_BIGENDIAN)
2629 if (endian == DEVICE_LITTLE_ENDIAN) {
2630 val = bswap32(val);
2632 #else
2633 if (endian == DEVICE_BIG_ENDIAN) {
2634 val = bswap32(val);
2636 #endif
2637 io_mem_write(mr, addr1, val, 4);
2638 } else {
2639 /* RAM case */
2640 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2641 ptr = qemu_get_ram_ptr(addr1);
2642 switch (endian) {
2643 case DEVICE_LITTLE_ENDIAN:
2644 stl_le_p(ptr, val);
2645 break;
2646 case DEVICE_BIG_ENDIAN:
2647 stl_be_p(ptr, val);
2648 break;
2649 default:
2650 stl_p(ptr, val);
2651 break;
2653 invalidate_and_set_dirty(addr1, 4);
2657 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2659 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2662 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2664 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2667 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2669 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2672 /* XXX: optimize */
2673 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2675 uint8_t v = val;
2676 address_space_rw(as, addr, &v, 1, 1);
2679 /* warning: addr must be aligned */
2680 static inline void stw_phys_internal(AddressSpace *as,
2681 hwaddr addr, uint32_t val,
2682 enum device_endian endian)
2684 uint8_t *ptr;
2685 MemoryRegion *mr;
2686 hwaddr l = 2;
2687 hwaddr addr1;
2689 mr = address_space_translate(as, addr, &addr1, &l, true);
2690 if (l < 2 || !memory_access_is_direct(mr, true)) {
2691 #if defined(TARGET_WORDS_BIGENDIAN)
2692 if (endian == DEVICE_LITTLE_ENDIAN) {
2693 val = bswap16(val);
2695 #else
2696 if (endian == DEVICE_BIG_ENDIAN) {
2697 val = bswap16(val);
2699 #endif
2700 io_mem_write(mr, addr1, val, 2);
2701 } else {
2702 /* RAM case */
2703 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2704 ptr = qemu_get_ram_ptr(addr1);
2705 switch (endian) {
2706 case DEVICE_LITTLE_ENDIAN:
2707 stw_le_p(ptr, val);
2708 break;
2709 case DEVICE_BIG_ENDIAN:
2710 stw_be_p(ptr, val);
2711 break;
2712 default:
2713 stw_p(ptr, val);
2714 break;
2716 invalidate_and_set_dirty(addr1, 2);
2720 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2722 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2725 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2727 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2730 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2732 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2735 /* XXX: optimize */
2736 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2738 val = tswap64(val);
2739 address_space_rw(as, addr, (void *) &val, 8, 1);
2742 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2744 val = cpu_to_le64(val);
2745 address_space_rw(as, addr, (void *) &val, 8, 1);
2748 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2750 val = cpu_to_be64(val);
2751 address_space_rw(as, addr, (void *) &val, 8, 1);
2754 /* virtual memory access for debug (includes writing to ROM) */
2755 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2756 uint8_t *buf, int len, int is_write)
2758 int l;
2759 hwaddr phys_addr;
2760 target_ulong page;
2762 while (len > 0) {
2763 page = addr & TARGET_PAGE_MASK;
2764 phys_addr = cpu_get_phys_page_debug(cpu, page);
2765 /* if no physical page mapped, return an error */
2766 if (phys_addr == -1)
2767 return -1;
2768 l = (page + TARGET_PAGE_SIZE) - addr;
2769 if (l > len)
2770 l = len;
2771 phys_addr += (addr & ~TARGET_PAGE_MASK);
2772 if (is_write) {
2773 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
2774 } else {
2775 address_space_rw(cpu->as, phys_addr, buf, l, 0);
2777 len -= l;
2778 buf += l;
2779 addr += l;
2781 return 0;
2783 #endif
2786 * A helper function for the _utterly broken_ virtio device model to find out if
2787 * it's running on a big endian machine. Don't do this at home kids!
2789 bool target_words_bigendian(void);
2790 bool target_words_bigendian(void)
2792 #if defined(TARGET_WORDS_BIGENDIAN)
2793 return true;
2794 #else
2795 return false;
2796 #endif
2799 #ifndef CONFIG_USER_ONLY
2800 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2802 MemoryRegion*mr;
2803 hwaddr l = 1;
2805 mr = address_space_translate(&address_space_memory,
2806 phys_addr, &phys_addr, &l, false);
2808 return !(memory_region_is_ram(mr) ||
2809 memory_region_is_romd(mr));
2812 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2814 RAMBlock *block;
2816 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2817 func(block->host, block->offset, block->length, opaque);
2820 #endif