Merge remote-tracking branch 'remotes/rth/tags/x86-next-20141214' into staging
[qemu-kvm.git] / exec.c
blob71ac104b391486d23c17db9659dc91a3f83f2c43
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #include "hw/qdev.h"
30 #include "qemu/osdep.h"
31 #include "sysemu/kvm.h"
32 #include "sysemu/sysemu.h"
33 #include "hw/xen/xen.h"
34 #include "qemu/timer.h"
35 #include "qemu/config-file.h"
36 #include "qemu/error-report.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
51 #include "exec/memory-internal.h"
52 #include "exec/ram_addr.h"
54 #include "qemu/range.h"
56 //#define DEBUG_SUBPAGE
58 #if !defined(CONFIG_USER_ONLY)
59 static bool in_migration;
61 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
63 static MemoryRegion *system_memory;
64 static MemoryRegion *system_io;
66 AddressSpace address_space_io;
67 AddressSpace address_space_memory;
69 MemoryRegion io_mem_rom, io_mem_notdirty;
70 static MemoryRegion io_mem_unassigned;
72 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
73 #define RAM_PREALLOC (1 << 0)
75 /* RAM is mmap-ed with MAP_SHARED */
76 #define RAM_SHARED (1 << 1)
78 #endif
80 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
81 /* current CPU in the current thread. It is only valid inside
82 cpu_exec() */
83 DEFINE_TLS(CPUState *, current_cpu);
84 /* 0 = Do not count executed instructions.
85 1 = Precise instruction counting.
86 2 = Adaptive rate instruction counting. */
87 int use_icount;
89 #if !defined(CONFIG_USER_ONLY)
91 typedef struct PhysPageEntry PhysPageEntry;
93 struct PhysPageEntry {
94 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
95 uint32_t skip : 6;
96 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
97 uint32_t ptr : 26;
100 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
102 /* Size of the L2 (and L3, etc) page tables. */
103 #define ADDR_SPACE_BITS 64
105 #define P_L2_BITS 9
106 #define P_L2_SIZE (1 << P_L2_BITS)
108 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
110 typedef PhysPageEntry Node[P_L2_SIZE];
112 typedef struct PhysPageMap {
113 unsigned sections_nb;
114 unsigned sections_nb_alloc;
115 unsigned nodes_nb;
116 unsigned nodes_nb_alloc;
117 Node *nodes;
118 MemoryRegionSection *sections;
119 } PhysPageMap;
121 struct AddressSpaceDispatch {
122 /* This is a multi-level map on the physical address space.
123 * The bottom level has pointers to MemoryRegionSections.
125 PhysPageEntry phys_map;
126 PhysPageMap map;
127 AddressSpace *as;
130 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
131 typedef struct subpage_t {
132 MemoryRegion iomem;
133 AddressSpace *as;
134 hwaddr base;
135 uint16_t sub_section[TARGET_PAGE_SIZE];
136 } subpage_t;
138 #define PHYS_SECTION_UNASSIGNED 0
139 #define PHYS_SECTION_NOTDIRTY 1
140 #define PHYS_SECTION_ROM 2
141 #define PHYS_SECTION_WATCH 3
143 static void io_mem_init(void);
144 static void memory_map_init(void);
145 static void tcg_commit(MemoryListener *listener);
147 static MemoryRegion io_mem_watch;
148 #endif
150 #if !defined(CONFIG_USER_ONLY)
152 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
154 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
155 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
156 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
157 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
161 static uint32_t phys_map_node_alloc(PhysPageMap *map)
163 unsigned i;
164 uint32_t ret;
166 ret = map->nodes_nb++;
167 assert(ret != PHYS_MAP_NODE_NIL);
168 assert(ret != map->nodes_nb_alloc);
169 for (i = 0; i < P_L2_SIZE; ++i) {
170 map->nodes[ret][i].skip = 1;
171 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
173 return ret;
176 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
177 hwaddr *index, hwaddr *nb, uint16_t leaf,
178 int level)
180 PhysPageEntry *p;
181 int i;
182 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
184 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
185 lp->ptr = phys_map_node_alloc(map);
186 p = map->nodes[lp->ptr];
187 if (level == 0) {
188 for (i = 0; i < P_L2_SIZE; i++) {
189 p[i].skip = 0;
190 p[i].ptr = PHYS_SECTION_UNASSIGNED;
193 } else {
194 p = map->nodes[lp->ptr];
196 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
198 while (*nb && lp < &p[P_L2_SIZE]) {
199 if ((*index & (step - 1)) == 0 && *nb >= step) {
200 lp->skip = 0;
201 lp->ptr = leaf;
202 *index += step;
203 *nb -= step;
204 } else {
205 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
207 ++lp;
211 static void phys_page_set(AddressSpaceDispatch *d,
212 hwaddr index, hwaddr nb,
213 uint16_t leaf)
215 /* Wildly overreserve - it doesn't matter much. */
216 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
218 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
221 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
222 * and update our entry so we can skip it and go directly to the destination.
224 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
226 unsigned valid_ptr = P_L2_SIZE;
227 int valid = 0;
228 PhysPageEntry *p;
229 int i;
231 if (lp->ptr == PHYS_MAP_NODE_NIL) {
232 return;
235 p = nodes[lp->ptr];
236 for (i = 0; i < P_L2_SIZE; i++) {
237 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
238 continue;
241 valid_ptr = i;
242 valid++;
243 if (p[i].skip) {
244 phys_page_compact(&p[i], nodes, compacted);
248 /* We can only compress if there's only one child. */
249 if (valid != 1) {
250 return;
253 assert(valid_ptr < P_L2_SIZE);
255 /* Don't compress if it won't fit in the # of bits we have. */
256 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
257 return;
260 lp->ptr = p[valid_ptr].ptr;
261 if (!p[valid_ptr].skip) {
262 /* If our only child is a leaf, make this a leaf. */
263 /* By design, we should have made this node a leaf to begin with so we
264 * should never reach here.
265 * But since it's so simple to handle this, let's do it just in case we
266 * change this rule.
268 lp->skip = 0;
269 } else {
270 lp->skip += p[valid_ptr].skip;
274 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
276 DECLARE_BITMAP(compacted, nodes_nb);
278 if (d->phys_map.skip) {
279 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
283 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
284 Node *nodes, MemoryRegionSection *sections)
286 PhysPageEntry *p;
287 hwaddr index = addr >> TARGET_PAGE_BITS;
288 int i;
290 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
291 if (lp.ptr == PHYS_MAP_NODE_NIL) {
292 return &sections[PHYS_SECTION_UNASSIGNED];
294 p = nodes[lp.ptr];
295 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
298 if (sections[lp.ptr].size.hi ||
299 range_covers_byte(sections[lp.ptr].offset_within_address_space,
300 sections[lp.ptr].size.lo, addr)) {
301 return &sections[lp.ptr];
302 } else {
303 return &sections[PHYS_SECTION_UNASSIGNED];
307 bool memory_region_is_unassigned(MemoryRegion *mr)
309 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
310 && mr != &io_mem_watch;
313 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
314 hwaddr addr,
315 bool resolve_subpage)
317 MemoryRegionSection *section;
318 subpage_t *subpage;
320 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
321 if (resolve_subpage && section->mr->subpage) {
322 subpage = container_of(section->mr, subpage_t, iomem);
323 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
325 return section;
328 static MemoryRegionSection *
329 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
330 hwaddr *plen, bool resolve_subpage)
332 MemoryRegionSection *section;
333 Int128 diff;
335 section = address_space_lookup_region(d, addr, resolve_subpage);
336 /* Compute offset within MemoryRegionSection */
337 addr -= section->offset_within_address_space;
339 /* Compute offset within MemoryRegion */
340 *xlat = addr + section->offset_within_region;
342 diff = int128_sub(section->mr->size, int128_make64(addr));
343 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
344 return section;
347 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
349 if (memory_region_is_ram(mr)) {
350 return !(is_write && mr->readonly);
352 if (memory_region_is_romd(mr)) {
353 return !is_write;
356 return false;
359 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
360 hwaddr *xlat, hwaddr *plen,
361 bool is_write)
363 IOMMUTLBEntry iotlb;
364 MemoryRegionSection *section;
365 MemoryRegion *mr;
366 hwaddr len = *plen;
368 for (;;) {
369 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
370 mr = section->mr;
372 if (!mr->iommu_ops) {
373 break;
376 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
377 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
378 | (addr & iotlb.addr_mask));
379 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
380 if (!(iotlb.perm & (1 << is_write))) {
381 mr = &io_mem_unassigned;
382 break;
385 as = iotlb.target_as;
388 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
389 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
390 len = MIN(page, len);
393 *plen = len;
394 *xlat = addr;
395 return mr;
398 MemoryRegionSection *
399 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
400 hwaddr *plen)
402 MemoryRegionSection *section;
403 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
405 assert(!section->mr->iommu_ops);
406 return section;
408 #endif
410 void cpu_exec_init_all(void)
412 #if !defined(CONFIG_USER_ONLY)
413 qemu_mutex_init(&ram_list.mutex);
414 memory_map_init();
415 io_mem_init();
416 #endif
419 #if !defined(CONFIG_USER_ONLY)
421 static int cpu_common_post_load(void *opaque, int version_id)
423 CPUState *cpu = opaque;
425 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
426 version_id is increased. */
427 cpu->interrupt_request &= ~0x01;
428 tlb_flush(cpu, 1);
430 return 0;
433 static int cpu_common_pre_load(void *opaque)
435 CPUState *cpu = opaque;
437 cpu->exception_index = 0;
439 return 0;
442 static bool cpu_common_exception_index_needed(void *opaque)
444 CPUState *cpu = opaque;
446 return cpu->exception_index != 0;
449 static const VMStateDescription vmstate_cpu_common_exception_index = {
450 .name = "cpu_common/exception_index",
451 .version_id = 1,
452 .minimum_version_id = 1,
453 .fields = (VMStateField[]) {
454 VMSTATE_INT32(exception_index, CPUState),
455 VMSTATE_END_OF_LIST()
459 const VMStateDescription vmstate_cpu_common = {
460 .name = "cpu_common",
461 .version_id = 1,
462 .minimum_version_id = 1,
463 .pre_load = cpu_common_pre_load,
464 .post_load = cpu_common_post_load,
465 .fields = (VMStateField[]) {
466 VMSTATE_UINT32(halted, CPUState),
467 VMSTATE_UINT32(interrupt_request, CPUState),
468 VMSTATE_END_OF_LIST()
470 .subsections = (VMStateSubsection[]) {
472 .vmsd = &vmstate_cpu_common_exception_index,
473 .needed = cpu_common_exception_index_needed,
474 } , {
475 /* empty */
480 #endif
482 CPUState *qemu_get_cpu(int index)
484 CPUState *cpu;
486 CPU_FOREACH(cpu) {
487 if (cpu->cpu_index == index) {
488 return cpu;
492 return NULL;
495 #if !defined(CONFIG_USER_ONLY)
496 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
498 /* We only support one address space per cpu at the moment. */
499 assert(cpu->as == as);
501 if (cpu->tcg_as_listener) {
502 memory_listener_unregister(cpu->tcg_as_listener);
503 } else {
504 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
506 cpu->tcg_as_listener->commit = tcg_commit;
507 memory_listener_register(cpu->tcg_as_listener, as);
509 #endif
511 void cpu_exec_init(CPUArchState *env)
513 CPUState *cpu = ENV_GET_CPU(env);
514 CPUClass *cc = CPU_GET_CLASS(cpu);
515 CPUState *some_cpu;
516 int cpu_index;
518 #if defined(CONFIG_USER_ONLY)
519 cpu_list_lock();
520 #endif
521 cpu_index = 0;
522 CPU_FOREACH(some_cpu) {
523 cpu_index++;
525 cpu->cpu_index = cpu_index;
526 cpu->numa_node = 0;
527 QTAILQ_INIT(&cpu->breakpoints);
528 QTAILQ_INIT(&cpu->watchpoints);
529 #ifndef CONFIG_USER_ONLY
530 cpu->as = &address_space_memory;
531 cpu->thread_id = qemu_get_thread_id();
532 #endif
533 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
534 #if defined(CONFIG_USER_ONLY)
535 cpu_list_unlock();
536 #endif
537 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
538 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
540 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
541 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
542 cpu_save, cpu_load, env);
543 assert(cc->vmsd == NULL);
544 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
545 #endif
546 if (cc->vmsd != NULL) {
547 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
551 #if defined(TARGET_HAS_ICE)
552 #if defined(CONFIG_USER_ONLY)
553 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
555 tb_invalidate_phys_page_range(pc, pc + 1, 0);
557 #else
558 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
560 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
561 if (phys != -1) {
562 tb_invalidate_phys_addr(cpu->as,
563 phys | (pc & ~TARGET_PAGE_MASK));
566 #endif
567 #endif /* TARGET_HAS_ICE */
569 #if defined(CONFIG_USER_ONLY)
570 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
575 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
576 int flags)
578 return -ENOSYS;
581 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
585 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
586 int flags, CPUWatchpoint **watchpoint)
588 return -ENOSYS;
590 #else
591 /* Add a watchpoint. */
592 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
593 int flags, CPUWatchpoint **watchpoint)
595 CPUWatchpoint *wp;
597 /* forbid ranges which are empty or run off the end of the address space */
598 if (len == 0 || (addr + len - 1) < addr) {
599 error_report("tried to set invalid watchpoint at %"
600 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
601 return -EINVAL;
603 wp = g_malloc(sizeof(*wp));
605 wp->vaddr = addr;
606 wp->len = len;
607 wp->flags = flags;
609 /* keep all GDB-injected watchpoints in front */
610 if (flags & BP_GDB) {
611 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
612 } else {
613 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
616 tlb_flush_page(cpu, addr);
618 if (watchpoint)
619 *watchpoint = wp;
620 return 0;
623 /* Remove a specific watchpoint. */
624 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
625 int flags)
627 CPUWatchpoint *wp;
629 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
630 if (addr == wp->vaddr && len == wp->len
631 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
632 cpu_watchpoint_remove_by_ref(cpu, wp);
633 return 0;
636 return -ENOENT;
639 /* Remove a specific watchpoint by reference. */
640 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
642 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
644 tlb_flush_page(cpu, watchpoint->vaddr);
646 g_free(watchpoint);
649 /* Remove all matching watchpoints. */
650 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
652 CPUWatchpoint *wp, *next;
654 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
655 if (wp->flags & mask) {
656 cpu_watchpoint_remove_by_ref(cpu, wp);
661 /* Return true if this watchpoint address matches the specified
662 * access (ie the address range covered by the watchpoint overlaps
663 * partially or completely with the address range covered by the
664 * access).
666 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
667 vaddr addr,
668 vaddr len)
670 /* We know the lengths are non-zero, but a little caution is
671 * required to avoid errors in the case where the range ends
672 * exactly at the top of the address space and so addr + len
673 * wraps round to zero.
675 vaddr wpend = wp->vaddr + wp->len - 1;
676 vaddr addrend = addr + len - 1;
678 return !(addr > wpend || wp->vaddr > addrend);
681 #endif
683 /* Add a breakpoint. */
684 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
685 CPUBreakpoint **breakpoint)
687 #if defined(TARGET_HAS_ICE)
688 CPUBreakpoint *bp;
690 bp = g_malloc(sizeof(*bp));
692 bp->pc = pc;
693 bp->flags = flags;
695 /* keep all GDB-injected breakpoints in front */
696 if (flags & BP_GDB) {
697 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
698 } else {
699 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
702 breakpoint_invalidate(cpu, pc);
704 if (breakpoint) {
705 *breakpoint = bp;
707 return 0;
708 #else
709 return -ENOSYS;
710 #endif
713 /* Remove a specific breakpoint. */
714 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
716 #if defined(TARGET_HAS_ICE)
717 CPUBreakpoint *bp;
719 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
720 if (bp->pc == pc && bp->flags == flags) {
721 cpu_breakpoint_remove_by_ref(cpu, bp);
722 return 0;
725 return -ENOENT;
726 #else
727 return -ENOSYS;
728 #endif
731 /* Remove a specific breakpoint by reference. */
732 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
734 #if defined(TARGET_HAS_ICE)
735 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
737 breakpoint_invalidate(cpu, breakpoint->pc);
739 g_free(breakpoint);
740 #endif
743 /* Remove all matching breakpoints. */
744 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
746 #if defined(TARGET_HAS_ICE)
747 CPUBreakpoint *bp, *next;
749 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
750 if (bp->flags & mask) {
751 cpu_breakpoint_remove_by_ref(cpu, bp);
754 #endif
757 /* enable or disable single step mode. EXCP_DEBUG is returned by the
758 CPU loop after each instruction */
759 void cpu_single_step(CPUState *cpu, int enabled)
761 #if defined(TARGET_HAS_ICE)
762 if (cpu->singlestep_enabled != enabled) {
763 cpu->singlestep_enabled = enabled;
764 if (kvm_enabled()) {
765 kvm_update_guest_debug(cpu, 0);
766 } else {
767 /* must flush all the translated code to avoid inconsistencies */
768 /* XXX: only flush what is necessary */
769 CPUArchState *env = cpu->env_ptr;
770 tb_flush(env);
773 #endif
776 void cpu_abort(CPUState *cpu, const char *fmt, ...)
778 va_list ap;
779 va_list ap2;
781 va_start(ap, fmt);
782 va_copy(ap2, ap);
783 fprintf(stderr, "qemu: fatal: ");
784 vfprintf(stderr, fmt, ap);
785 fprintf(stderr, "\n");
786 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
787 if (qemu_log_enabled()) {
788 qemu_log("qemu: fatal: ");
789 qemu_log_vprintf(fmt, ap2);
790 qemu_log("\n");
791 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
792 qemu_log_flush();
793 qemu_log_close();
795 va_end(ap2);
796 va_end(ap);
797 #if defined(CONFIG_USER_ONLY)
799 struct sigaction act;
800 sigfillset(&act.sa_mask);
801 act.sa_handler = SIG_DFL;
802 sigaction(SIGABRT, &act, NULL);
804 #endif
805 abort();
808 #if !defined(CONFIG_USER_ONLY)
809 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
811 RAMBlock *block;
813 /* The list is protected by the iothread lock here. */
814 block = ram_list.mru_block;
815 if (block && addr - block->offset < block->length) {
816 goto found;
818 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
819 if (addr - block->offset < block->length) {
820 goto found;
824 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
825 abort();
827 found:
828 ram_list.mru_block = block;
829 return block;
832 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
834 ram_addr_t start1;
835 RAMBlock *block;
836 ram_addr_t end;
838 end = TARGET_PAGE_ALIGN(start + length);
839 start &= TARGET_PAGE_MASK;
841 block = qemu_get_ram_block(start);
842 assert(block == qemu_get_ram_block(end - 1));
843 start1 = (uintptr_t)block->host + (start - block->offset);
844 cpu_tlb_reset_dirty_all(start1, length);
847 /* Note: start and end must be within the same ram block. */
848 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
849 unsigned client)
851 if (length == 0)
852 return;
853 cpu_physical_memory_clear_dirty_range(start, length, client);
855 if (tcg_enabled()) {
856 tlb_reset_dirty_range_all(start, length);
860 static void cpu_physical_memory_set_dirty_tracking(bool enable)
862 in_migration = enable;
865 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
866 MemoryRegionSection *section,
867 target_ulong vaddr,
868 hwaddr paddr, hwaddr xlat,
869 int prot,
870 target_ulong *address)
872 hwaddr iotlb;
873 CPUWatchpoint *wp;
875 if (memory_region_is_ram(section->mr)) {
876 /* Normal RAM. */
877 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
878 + xlat;
879 if (!section->readonly) {
880 iotlb |= PHYS_SECTION_NOTDIRTY;
881 } else {
882 iotlb |= PHYS_SECTION_ROM;
884 } else {
885 iotlb = section - section->address_space->dispatch->map.sections;
886 iotlb += xlat;
889 /* Make accesses to pages with watchpoints go via the
890 watchpoint trap routines. */
891 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
892 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
893 /* Avoid trapping reads of pages with a write breakpoint. */
894 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
895 iotlb = PHYS_SECTION_WATCH + paddr;
896 *address |= TLB_MMIO;
897 break;
902 return iotlb;
904 #endif /* defined(CONFIG_USER_ONLY) */
906 #if !defined(CONFIG_USER_ONLY)
908 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
909 uint16_t section);
910 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
912 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
913 qemu_anon_ram_alloc;
916 * Set a custom physical guest memory alloator.
917 * Accelerators with unusual needs may need this. Hopefully, we can
918 * get rid of it eventually.
920 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
922 phys_mem_alloc = alloc;
925 static uint16_t phys_section_add(PhysPageMap *map,
926 MemoryRegionSection *section)
928 /* The physical section number is ORed with a page-aligned
929 * pointer to produce the iotlb entries. Thus it should
930 * never overflow into the page-aligned value.
932 assert(map->sections_nb < TARGET_PAGE_SIZE);
934 if (map->sections_nb == map->sections_nb_alloc) {
935 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
936 map->sections = g_renew(MemoryRegionSection, map->sections,
937 map->sections_nb_alloc);
939 map->sections[map->sections_nb] = *section;
940 memory_region_ref(section->mr);
941 return map->sections_nb++;
944 static void phys_section_destroy(MemoryRegion *mr)
946 memory_region_unref(mr);
948 if (mr->subpage) {
949 subpage_t *subpage = container_of(mr, subpage_t, iomem);
950 object_unref(OBJECT(&subpage->iomem));
951 g_free(subpage);
955 static void phys_sections_free(PhysPageMap *map)
957 while (map->sections_nb > 0) {
958 MemoryRegionSection *section = &map->sections[--map->sections_nb];
959 phys_section_destroy(section->mr);
961 g_free(map->sections);
962 g_free(map->nodes);
965 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
967 subpage_t *subpage;
968 hwaddr base = section->offset_within_address_space
969 & TARGET_PAGE_MASK;
970 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
971 d->map.nodes, d->map.sections);
972 MemoryRegionSection subsection = {
973 .offset_within_address_space = base,
974 .size = int128_make64(TARGET_PAGE_SIZE),
976 hwaddr start, end;
978 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
980 if (!(existing->mr->subpage)) {
981 subpage = subpage_init(d->as, base);
982 subsection.address_space = d->as;
983 subsection.mr = &subpage->iomem;
984 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
985 phys_section_add(&d->map, &subsection));
986 } else {
987 subpage = container_of(existing->mr, subpage_t, iomem);
989 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
990 end = start + int128_get64(section->size) - 1;
991 subpage_register(subpage, start, end,
992 phys_section_add(&d->map, section));
996 static void register_multipage(AddressSpaceDispatch *d,
997 MemoryRegionSection *section)
999 hwaddr start_addr = section->offset_within_address_space;
1000 uint16_t section_index = phys_section_add(&d->map, section);
1001 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1002 TARGET_PAGE_BITS));
1004 assert(num_pages);
1005 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1008 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1010 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1011 AddressSpaceDispatch *d = as->next_dispatch;
1012 MemoryRegionSection now = *section, remain = *section;
1013 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1015 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1016 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1017 - now.offset_within_address_space;
1019 now.size = int128_min(int128_make64(left), now.size);
1020 register_subpage(d, &now);
1021 } else {
1022 now.size = int128_zero();
1024 while (int128_ne(remain.size, now.size)) {
1025 remain.size = int128_sub(remain.size, now.size);
1026 remain.offset_within_address_space += int128_get64(now.size);
1027 remain.offset_within_region += int128_get64(now.size);
1028 now = remain;
1029 if (int128_lt(remain.size, page_size)) {
1030 register_subpage(d, &now);
1031 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1032 now.size = page_size;
1033 register_subpage(d, &now);
1034 } else {
1035 now.size = int128_and(now.size, int128_neg(page_size));
1036 register_multipage(d, &now);
1041 void qemu_flush_coalesced_mmio_buffer(void)
1043 if (kvm_enabled())
1044 kvm_flush_coalesced_mmio_buffer();
1047 void qemu_mutex_lock_ramlist(void)
1049 qemu_mutex_lock(&ram_list.mutex);
1052 void qemu_mutex_unlock_ramlist(void)
1054 qemu_mutex_unlock(&ram_list.mutex);
1057 #ifdef __linux__
1059 #include <sys/vfs.h>
1061 #define HUGETLBFS_MAGIC 0x958458f6
1063 static long gethugepagesize(const char *path, Error **errp)
1065 struct statfs fs;
1066 int ret;
1068 do {
1069 ret = statfs(path, &fs);
1070 } while (ret != 0 && errno == EINTR);
1072 if (ret != 0) {
1073 error_setg_errno(errp, errno, "failed to get page size of file %s",
1074 path);
1075 return 0;
1078 if (fs.f_type != HUGETLBFS_MAGIC)
1079 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1081 return fs.f_bsize;
1084 static void *file_ram_alloc(RAMBlock *block,
1085 ram_addr_t memory,
1086 const char *path,
1087 Error **errp)
1089 char *filename;
1090 char *sanitized_name;
1091 char *c;
1092 void *area = NULL;
1093 int fd;
1094 uint64_t hpagesize;
1095 Error *local_err = NULL;
1097 hpagesize = gethugepagesize(path, &local_err);
1098 if (local_err) {
1099 error_propagate(errp, local_err);
1100 goto error;
1102 block->mr->align = hpagesize;
1104 if (memory < hpagesize) {
1105 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1106 "or larger than huge page size 0x%" PRIx64,
1107 memory, hpagesize);
1108 goto error;
1111 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1112 error_setg(errp,
1113 "host lacks kvm mmu notifiers, -mem-path unsupported");
1114 goto error;
1117 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1118 sanitized_name = g_strdup(memory_region_name(block->mr));
1119 for (c = sanitized_name; *c != '\0'; c++) {
1120 if (*c == '/')
1121 *c = '_';
1124 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1125 sanitized_name);
1126 g_free(sanitized_name);
1128 fd = mkstemp(filename);
1129 if (fd < 0) {
1130 error_setg_errno(errp, errno,
1131 "unable to create backing store for hugepages");
1132 g_free(filename);
1133 goto error;
1135 unlink(filename);
1136 g_free(filename);
1138 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1141 * ftruncate is not supported by hugetlbfs in older
1142 * hosts, so don't bother bailing out on errors.
1143 * If anything goes wrong with it under other filesystems,
1144 * mmap will fail.
1146 if (ftruncate(fd, memory)) {
1147 perror("ftruncate");
1150 area = mmap(0, memory, PROT_READ | PROT_WRITE,
1151 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
1152 fd, 0);
1153 if (area == MAP_FAILED) {
1154 error_setg_errno(errp, errno,
1155 "unable to map backing store for hugepages");
1156 close(fd);
1157 goto error;
1160 if (mem_prealloc) {
1161 os_mem_prealloc(fd, area, memory);
1164 block->fd = fd;
1165 return area;
1167 error:
1168 if (mem_prealloc) {
1169 error_report("%s\n", error_get_pretty(*errp));
1170 exit(1);
1172 return NULL;
1174 #endif
1176 static ram_addr_t find_ram_offset(ram_addr_t size)
1178 RAMBlock *block, *next_block;
1179 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1181 assert(size != 0); /* it would hand out same offset multiple times */
1183 if (QTAILQ_EMPTY(&ram_list.blocks))
1184 return 0;
1186 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1187 ram_addr_t end, next = RAM_ADDR_MAX;
1189 end = block->offset + block->length;
1191 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1192 if (next_block->offset >= end) {
1193 next = MIN(next, next_block->offset);
1196 if (next - end >= size && next - end < mingap) {
1197 offset = end;
1198 mingap = next - end;
1202 if (offset == RAM_ADDR_MAX) {
1203 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1204 (uint64_t)size);
1205 abort();
1208 return offset;
1211 ram_addr_t last_ram_offset(void)
1213 RAMBlock *block;
1214 ram_addr_t last = 0;
1216 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1217 last = MAX(last, block->offset + block->length);
1219 return last;
1222 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1224 int ret;
1226 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1227 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1228 "dump-guest-core", true)) {
1229 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1230 if (ret) {
1231 perror("qemu_madvise");
1232 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1233 "but dump_guest_core=off specified\n");
1238 static RAMBlock *find_ram_block(ram_addr_t addr)
1240 RAMBlock *block;
1242 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1243 if (block->offset == addr) {
1244 return block;
1248 return NULL;
1251 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1253 RAMBlock *new_block = find_ram_block(addr);
1254 RAMBlock *block;
1256 assert(new_block);
1257 assert(!new_block->idstr[0]);
1259 if (dev) {
1260 char *id = qdev_get_dev_path(dev);
1261 if (id) {
1262 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1263 g_free(id);
1266 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1268 /* This assumes the iothread lock is taken here too. */
1269 qemu_mutex_lock_ramlist();
1270 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1271 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1272 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1273 new_block->idstr);
1274 abort();
1277 qemu_mutex_unlock_ramlist();
1280 void qemu_ram_unset_idstr(ram_addr_t addr)
1282 RAMBlock *block = find_ram_block(addr);
1284 if (block) {
1285 memset(block->idstr, 0, sizeof(block->idstr));
1289 static int memory_try_enable_merging(void *addr, size_t len)
1291 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1292 /* disabled by the user */
1293 return 0;
1296 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1299 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1301 RAMBlock *block;
1302 ram_addr_t old_ram_size, new_ram_size;
1304 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1306 /* This assumes the iothread lock is taken here too. */
1307 qemu_mutex_lock_ramlist();
1308 new_block->offset = find_ram_offset(new_block->length);
1310 if (!new_block->host) {
1311 if (xen_enabled()) {
1312 xen_ram_alloc(new_block->offset, new_block->length, new_block->mr);
1313 } else {
1314 new_block->host = phys_mem_alloc(new_block->length,
1315 &new_block->mr->align);
1316 if (!new_block->host) {
1317 error_setg_errno(errp, errno,
1318 "cannot set up guest memory '%s'",
1319 memory_region_name(new_block->mr));
1320 qemu_mutex_unlock_ramlist();
1321 return -1;
1323 memory_try_enable_merging(new_block->host, new_block->length);
1327 /* Keep the list sorted from biggest to smallest block. */
1328 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1329 if (block->length < new_block->length) {
1330 break;
1333 if (block) {
1334 QTAILQ_INSERT_BEFORE(block, new_block, next);
1335 } else {
1336 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1338 ram_list.mru_block = NULL;
1340 ram_list.version++;
1341 qemu_mutex_unlock_ramlist();
1343 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1345 if (new_ram_size > old_ram_size) {
1346 int i;
1347 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1348 ram_list.dirty_memory[i] =
1349 bitmap_zero_extend(ram_list.dirty_memory[i],
1350 old_ram_size, new_ram_size);
1353 cpu_physical_memory_set_dirty_range(new_block->offset, new_block->length);
1355 qemu_ram_setup_dump(new_block->host, new_block->length);
1356 qemu_madvise(new_block->host, new_block->length, QEMU_MADV_HUGEPAGE);
1357 qemu_madvise(new_block->host, new_block->length, QEMU_MADV_DONTFORK);
1359 if (kvm_enabled()) {
1360 kvm_setup_guest_memory(new_block->host, new_block->length);
1363 return new_block->offset;
1366 #ifdef __linux__
1367 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1368 bool share, const char *mem_path,
1369 Error **errp)
1371 RAMBlock *new_block;
1372 ram_addr_t addr;
1373 Error *local_err = NULL;
1375 if (xen_enabled()) {
1376 error_setg(errp, "-mem-path not supported with Xen");
1377 return -1;
1380 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1382 * file_ram_alloc() needs to allocate just like
1383 * phys_mem_alloc, but we haven't bothered to provide
1384 * a hook there.
1386 error_setg(errp,
1387 "-mem-path not supported with this accelerator");
1388 return -1;
1391 size = TARGET_PAGE_ALIGN(size);
1392 new_block = g_malloc0(sizeof(*new_block));
1393 new_block->mr = mr;
1394 new_block->length = size;
1395 new_block->flags = share ? RAM_SHARED : 0;
1396 new_block->host = file_ram_alloc(new_block, size,
1397 mem_path, errp);
1398 if (!new_block->host) {
1399 g_free(new_block);
1400 return -1;
1403 addr = ram_block_add(new_block, &local_err);
1404 if (local_err) {
1405 g_free(new_block);
1406 error_propagate(errp, local_err);
1407 return -1;
1409 return addr;
1411 #endif
1413 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1414 MemoryRegion *mr, Error **errp)
1416 RAMBlock *new_block;
1417 ram_addr_t addr;
1418 Error *local_err = NULL;
1420 size = TARGET_PAGE_ALIGN(size);
1421 new_block = g_malloc0(sizeof(*new_block));
1422 new_block->mr = mr;
1423 new_block->length = size;
1424 new_block->fd = -1;
1425 new_block->host = host;
1426 if (host) {
1427 new_block->flags |= RAM_PREALLOC;
1429 addr = ram_block_add(new_block, &local_err);
1430 if (local_err) {
1431 g_free(new_block);
1432 error_propagate(errp, local_err);
1433 return -1;
1435 return addr;
1438 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1440 return qemu_ram_alloc_from_ptr(size, NULL, mr, errp);
1443 void qemu_ram_free_from_ptr(ram_addr_t addr)
1445 RAMBlock *block;
1447 /* This assumes the iothread lock is taken here too. */
1448 qemu_mutex_lock_ramlist();
1449 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1450 if (addr == block->offset) {
1451 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1452 ram_list.mru_block = NULL;
1453 ram_list.version++;
1454 g_free(block);
1455 break;
1458 qemu_mutex_unlock_ramlist();
1461 void qemu_ram_free(ram_addr_t addr)
1463 RAMBlock *block;
1465 /* This assumes the iothread lock is taken here too. */
1466 qemu_mutex_lock_ramlist();
1467 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1468 if (addr == block->offset) {
1469 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1470 ram_list.mru_block = NULL;
1471 ram_list.version++;
1472 if (block->flags & RAM_PREALLOC) {
1474 } else if (xen_enabled()) {
1475 xen_invalidate_map_cache_entry(block->host);
1476 #ifndef _WIN32
1477 } else if (block->fd >= 0) {
1478 munmap(block->host, block->length);
1479 close(block->fd);
1480 #endif
1481 } else {
1482 qemu_anon_ram_free(block->host, block->length);
1484 g_free(block);
1485 break;
1488 qemu_mutex_unlock_ramlist();
1492 #ifndef _WIN32
1493 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1495 RAMBlock *block;
1496 ram_addr_t offset;
1497 int flags;
1498 void *area, *vaddr;
1500 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1501 offset = addr - block->offset;
1502 if (offset < block->length) {
1503 vaddr = block->host + offset;
1504 if (block->flags & RAM_PREALLOC) {
1506 } else if (xen_enabled()) {
1507 abort();
1508 } else {
1509 flags = MAP_FIXED;
1510 munmap(vaddr, length);
1511 if (block->fd >= 0) {
1512 flags |= (block->flags & RAM_SHARED ?
1513 MAP_SHARED : MAP_PRIVATE);
1514 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1515 flags, block->fd, offset);
1516 } else {
1518 * Remap needs to match alloc. Accelerators that
1519 * set phys_mem_alloc never remap. If they did,
1520 * we'd need a remap hook here.
1522 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1524 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1525 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1526 flags, -1, 0);
1528 if (area != vaddr) {
1529 fprintf(stderr, "Could not remap addr: "
1530 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1531 length, addr);
1532 exit(1);
1534 memory_try_enable_merging(vaddr, length);
1535 qemu_ram_setup_dump(vaddr, length);
1537 return;
1541 #endif /* !_WIN32 */
1543 int qemu_get_ram_fd(ram_addr_t addr)
1545 RAMBlock *block = qemu_get_ram_block(addr);
1547 return block->fd;
1550 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1552 RAMBlock *block = qemu_get_ram_block(addr);
1554 return block->host;
1557 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1558 With the exception of the softmmu code in this file, this should
1559 only be used for local memory (e.g. video ram) that the device owns,
1560 and knows it isn't going to access beyond the end of the block.
1562 It should not be used for general purpose DMA.
1563 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1565 void *qemu_get_ram_ptr(ram_addr_t addr)
1567 RAMBlock *block = qemu_get_ram_block(addr);
1569 if (xen_enabled()) {
1570 /* We need to check if the requested address is in the RAM
1571 * because we don't want to map the entire memory in QEMU.
1572 * In that case just map until the end of the page.
1574 if (block->offset == 0) {
1575 return xen_map_cache(addr, 0, 0);
1576 } else if (block->host == NULL) {
1577 block->host =
1578 xen_map_cache(block->offset, block->length, 1);
1581 return block->host + (addr - block->offset);
1584 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1585 * but takes a size argument */
1586 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1588 if (*size == 0) {
1589 return NULL;
1591 if (xen_enabled()) {
1592 return xen_map_cache(addr, *size, 1);
1593 } else {
1594 RAMBlock *block;
1596 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1597 if (addr - block->offset < block->length) {
1598 if (addr - block->offset + *size > block->length)
1599 *size = block->length - addr + block->offset;
1600 return block->host + (addr - block->offset);
1604 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1605 abort();
1609 /* Some of the softmmu routines need to translate from a host pointer
1610 (typically a TLB entry) back to a ram offset. */
1611 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1613 RAMBlock *block;
1614 uint8_t *host = ptr;
1616 if (xen_enabled()) {
1617 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1618 return qemu_get_ram_block(*ram_addr)->mr;
1621 block = ram_list.mru_block;
1622 if (block && block->host && host - block->host < block->length) {
1623 goto found;
1626 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1627 /* This case append when the block is not mapped. */
1628 if (block->host == NULL) {
1629 continue;
1631 if (host - block->host < block->length) {
1632 goto found;
1636 return NULL;
1638 found:
1639 *ram_addr = block->offset + (host - block->host);
1640 return block->mr;
1643 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1644 uint64_t val, unsigned size)
1646 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1647 tb_invalidate_phys_page_fast(ram_addr, size);
1649 switch (size) {
1650 case 1:
1651 stb_p(qemu_get_ram_ptr(ram_addr), val);
1652 break;
1653 case 2:
1654 stw_p(qemu_get_ram_ptr(ram_addr), val);
1655 break;
1656 case 4:
1657 stl_p(qemu_get_ram_ptr(ram_addr), val);
1658 break;
1659 default:
1660 abort();
1662 cpu_physical_memory_set_dirty_range_nocode(ram_addr, size);
1663 /* we remove the notdirty callback only if the code has been
1664 flushed */
1665 if (!cpu_physical_memory_is_clean(ram_addr)) {
1666 CPUArchState *env = current_cpu->env_ptr;
1667 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1671 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1672 unsigned size, bool is_write)
1674 return is_write;
1677 static const MemoryRegionOps notdirty_mem_ops = {
1678 .write = notdirty_mem_write,
1679 .valid.accepts = notdirty_mem_accepts,
1680 .endianness = DEVICE_NATIVE_ENDIAN,
1683 /* Generate a debug exception if a watchpoint has been hit. */
1684 static void check_watchpoint(int offset, int len, int flags)
1686 CPUState *cpu = current_cpu;
1687 CPUArchState *env = cpu->env_ptr;
1688 target_ulong pc, cs_base;
1689 target_ulong vaddr;
1690 CPUWatchpoint *wp;
1691 int cpu_flags;
1693 if (cpu->watchpoint_hit) {
1694 /* We re-entered the check after replacing the TB. Now raise
1695 * the debug interrupt so that is will trigger after the
1696 * current instruction. */
1697 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1698 return;
1700 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1701 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1702 if (cpu_watchpoint_address_matches(wp, vaddr, len)
1703 && (wp->flags & flags)) {
1704 if (flags == BP_MEM_READ) {
1705 wp->flags |= BP_WATCHPOINT_HIT_READ;
1706 } else {
1707 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
1709 wp->hitaddr = vaddr;
1710 if (!cpu->watchpoint_hit) {
1711 cpu->watchpoint_hit = wp;
1712 tb_check_watchpoint(cpu);
1713 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1714 cpu->exception_index = EXCP_DEBUG;
1715 cpu_loop_exit(cpu);
1716 } else {
1717 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1718 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1719 cpu_resume_from_signal(cpu, NULL);
1722 } else {
1723 wp->flags &= ~BP_WATCHPOINT_HIT;
1728 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1729 so these check for a hit then pass through to the normal out-of-line
1730 phys routines. */
1731 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1732 unsigned size)
1734 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_READ);
1735 switch (size) {
1736 case 1: return ldub_phys(&address_space_memory, addr);
1737 case 2: return lduw_phys(&address_space_memory, addr);
1738 case 4: return ldl_phys(&address_space_memory, addr);
1739 default: abort();
1743 static void watch_mem_write(void *opaque, hwaddr addr,
1744 uint64_t val, unsigned size)
1746 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_WRITE);
1747 switch (size) {
1748 case 1:
1749 stb_phys(&address_space_memory, addr, val);
1750 break;
1751 case 2:
1752 stw_phys(&address_space_memory, addr, val);
1753 break;
1754 case 4:
1755 stl_phys(&address_space_memory, addr, val);
1756 break;
1757 default: abort();
1761 static const MemoryRegionOps watch_mem_ops = {
1762 .read = watch_mem_read,
1763 .write = watch_mem_write,
1764 .endianness = DEVICE_NATIVE_ENDIAN,
1767 static uint64_t subpage_read(void *opaque, hwaddr addr,
1768 unsigned len)
1770 subpage_t *subpage = opaque;
1771 uint8_t buf[4];
1773 #if defined(DEBUG_SUBPAGE)
1774 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1775 subpage, len, addr);
1776 #endif
1777 address_space_read(subpage->as, addr + subpage->base, buf, len);
1778 switch (len) {
1779 case 1:
1780 return ldub_p(buf);
1781 case 2:
1782 return lduw_p(buf);
1783 case 4:
1784 return ldl_p(buf);
1785 default:
1786 abort();
1790 static void subpage_write(void *opaque, hwaddr addr,
1791 uint64_t value, unsigned len)
1793 subpage_t *subpage = opaque;
1794 uint8_t buf[4];
1796 #if defined(DEBUG_SUBPAGE)
1797 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1798 " value %"PRIx64"\n",
1799 __func__, subpage, len, addr, value);
1800 #endif
1801 switch (len) {
1802 case 1:
1803 stb_p(buf, value);
1804 break;
1805 case 2:
1806 stw_p(buf, value);
1807 break;
1808 case 4:
1809 stl_p(buf, value);
1810 break;
1811 default:
1812 abort();
1814 address_space_write(subpage->as, addr + subpage->base, buf, len);
1817 static bool subpage_accepts(void *opaque, hwaddr addr,
1818 unsigned len, bool is_write)
1820 subpage_t *subpage = opaque;
1821 #if defined(DEBUG_SUBPAGE)
1822 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1823 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1824 #endif
1826 return address_space_access_valid(subpage->as, addr + subpage->base,
1827 len, is_write);
1830 static const MemoryRegionOps subpage_ops = {
1831 .read = subpage_read,
1832 .write = subpage_write,
1833 .valid.accepts = subpage_accepts,
1834 .endianness = DEVICE_NATIVE_ENDIAN,
1837 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1838 uint16_t section)
1840 int idx, eidx;
1842 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1843 return -1;
1844 idx = SUBPAGE_IDX(start);
1845 eidx = SUBPAGE_IDX(end);
1846 #if defined(DEBUG_SUBPAGE)
1847 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1848 __func__, mmio, start, end, idx, eidx, section);
1849 #endif
1850 for (; idx <= eidx; idx++) {
1851 mmio->sub_section[idx] = section;
1854 return 0;
1857 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1859 subpage_t *mmio;
1861 mmio = g_malloc0(sizeof(subpage_t));
1863 mmio->as = as;
1864 mmio->base = base;
1865 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1866 NULL, TARGET_PAGE_SIZE);
1867 mmio->iomem.subpage = true;
1868 #if defined(DEBUG_SUBPAGE)
1869 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1870 mmio, base, TARGET_PAGE_SIZE);
1871 #endif
1872 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1874 return mmio;
1877 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
1878 MemoryRegion *mr)
1880 assert(as);
1881 MemoryRegionSection section = {
1882 .address_space = as,
1883 .mr = mr,
1884 .offset_within_address_space = 0,
1885 .offset_within_region = 0,
1886 .size = int128_2_64(),
1889 return phys_section_add(map, &section);
1892 MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1894 return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1897 static void io_mem_init(void)
1899 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
1900 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1901 NULL, UINT64_MAX);
1902 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1903 NULL, UINT64_MAX);
1904 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1905 NULL, UINT64_MAX);
1908 static void mem_begin(MemoryListener *listener)
1910 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1911 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1912 uint16_t n;
1914 n = dummy_section(&d->map, as, &io_mem_unassigned);
1915 assert(n == PHYS_SECTION_UNASSIGNED);
1916 n = dummy_section(&d->map, as, &io_mem_notdirty);
1917 assert(n == PHYS_SECTION_NOTDIRTY);
1918 n = dummy_section(&d->map, as, &io_mem_rom);
1919 assert(n == PHYS_SECTION_ROM);
1920 n = dummy_section(&d->map, as, &io_mem_watch);
1921 assert(n == PHYS_SECTION_WATCH);
1923 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1924 d->as = as;
1925 as->next_dispatch = d;
1928 static void mem_commit(MemoryListener *listener)
1930 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1931 AddressSpaceDispatch *cur = as->dispatch;
1932 AddressSpaceDispatch *next = as->next_dispatch;
1934 phys_page_compact_all(next, next->map.nodes_nb);
1936 as->dispatch = next;
1938 if (cur) {
1939 phys_sections_free(&cur->map);
1940 g_free(cur);
1944 static void tcg_commit(MemoryListener *listener)
1946 CPUState *cpu;
1948 /* since each CPU stores ram addresses in its TLB cache, we must
1949 reset the modified entries */
1950 /* XXX: slow ! */
1951 CPU_FOREACH(cpu) {
1952 /* FIXME: Disentangle the cpu.h circular files deps so we can
1953 directly get the right CPU from listener. */
1954 if (cpu->tcg_as_listener != listener) {
1955 continue;
1957 tlb_flush(cpu, 1);
1961 static void core_log_global_start(MemoryListener *listener)
1963 cpu_physical_memory_set_dirty_tracking(true);
1966 static void core_log_global_stop(MemoryListener *listener)
1968 cpu_physical_memory_set_dirty_tracking(false);
1971 static MemoryListener core_memory_listener = {
1972 .log_global_start = core_log_global_start,
1973 .log_global_stop = core_log_global_stop,
1974 .priority = 1,
1977 void address_space_init_dispatch(AddressSpace *as)
1979 as->dispatch = NULL;
1980 as->dispatch_listener = (MemoryListener) {
1981 .begin = mem_begin,
1982 .commit = mem_commit,
1983 .region_add = mem_add,
1984 .region_nop = mem_add,
1985 .priority = 0,
1987 memory_listener_register(&as->dispatch_listener, as);
1990 void address_space_destroy_dispatch(AddressSpace *as)
1992 AddressSpaceDispatch *d = as->dispatch;
1994 memory_listener_unregister(&as->dispatch_listener);
1995 g_free(d);
1996 as->dispatch = NULL;
1999 static void memory_map_init(void)
2001 system_memory = g_malloc(sizeof(*system_memory));
2003 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2004 address_space_init(&address_space_memory, system_memory, "memory");
2006 system_io = g_malloc(sizeof(*system_io));
2007 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2008 65536);
2009 address_space_init(&address_space_io, system_io, "I/O");
2011 memory_listener_register(&core_memory_listener, &address_space_memory);
2014 MemoryRegion *get_system_memory(void)
2016 return system_memory;
2019 MemoryRegion *get_system_io(void)
2021 return system_io;
2024 #endif /* !defined(CONFIG_USER_ONLY) */
2026 /* physical memory access (slow version, mainly for debug) */
2027 #if defined(CONFIG_USER_ONLY)
2028 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2029 uint8_t *buf, int len, int is_write)
2031 int l, flags;
2032 target_ulong page;
2033 void * p;
2035 while (len > 0) {
2036 page = addr & TARGET_PAGE_MASK;
2037 l = (page + TARGET_PAGE_SIZE) - addr;
2038 if (l > len)
2039 l = len;
2040 flags = page_get_flags(page);
2041 if (!(flags & PAGE_VALID))
2042 return -1;
2043 if (is_write) {
2044 if (!(flags & PAGE_WRITE))
2045 return -1;
2046 /* XXX: this code should not depend on lock_user */
2047 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2048 return -1;
2049 memcpy(p, buf, l);
2050 unlock_user(p, addr, l);
2051 } else {
2052 if (!(flags & PAGE_READ))
2053 return -1;
2054 /* XXX: this code should not depend on lock_user */
2055 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2056 return -1;
2057 memcpy(buf, p, l);
2058 unlock_user(p, addr, 0);
2060 len -= l;
2061 buf += l;
2062 addr += l;
2064 return 0;
2067 #else
2069 static void invalidate_and_set_dirty(hwaddr addr,
2070 hwaddr length)
2072 if (cpu_physical_memory_range_includes_clean(addr, length)) {
2073 tb_invalidate_phys_range(addr, addr + length, 0);
2074 cpu_physical_memory_set_dirty_range_nocode(addr, length);
2076 xen_modified_memory(addr, length);
2079 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2081 unsigned access_size_max = mr->ops->valid.max_access_size;
2083 /* Regions are assumed to support 1-4 byte accesses unless
2084 otherwise specified. */
2085 if (access_size_max == 0) {
2086 access_size_max = 4;
2089 /* Bound the maximum access by the alignment of the address. */
2090 if (!mr->ops->impl.unaligned) {
2091 unsigned align_size_max = addr & -addr;
2092 if (align_size_max != 0 && align_size_max < access_size_max) {
2093 access_size_max = align_size_max;
2097 /* Don't attempt accesses larger than the maximum. */
2098 if (l > access_size_max) {
2099 l = access_size_max;
2101 if (l & (l - 1)) {
2102 l = 1 << (qemu_fls(l) - 1);
2105 return l;
2108 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
2109 int len, bool is_write)
2111 hwaddr l;
2112 uint8_t *ptr;
2113 uint64_t val;
2114 hwaddr addr1;
2115 MemoryRegion *mr;
2116 bool error = false;
2118 while (len > 0) {
2119 l = len;
2120 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2122 if (is_write) {
2123 if (!memory_access_is_direct(mr, is_write)) {
2124 l = memory_access_size(mr, l, addr1);
2125 /* XXX: could force current_cpu to NULL to avoid
2126 potential bugs */
2127 switch (l) {
2128 case 8:
2129 /* 64 bit write access */
2130 val = ldq_p(buf);
2131 error |= io_mem_write(mr, addr1, val, 8);
2132 break;
2133 case 4:
2134 /* 32 bit write access */
2135 val = ldl_p(buf);
2136 error |= io_mem_write(mr, addr1, val, 4);
2137 break;
2138 case 2:
2139 /* 16 bit write access */
2140 val = lduw_p(buf);
2141 error |= io_mem_write(mr, addr1, val, 2);
2142 break;
2143 case 1:
2144 /* 8 bit write access */
2145 val = ldub_p(buf);
2146 error |= io_mem_write(mr, addr1, val, 1);
2147 break;
2148 default:
2149 abort();
2151 } else {
2152 addr1 += memory_region_get_ram_addr(mr);
2153 /* RAM case */
2154 ptr = qemu_get_ram_ptr(addr1);
2155 memcpy(ptr, buf, l);
2156 invalidate_and_set_dirty(addr1, l);
2158 } else {
2159 if (!memory_access_is_direct(mr, is_write)) {
2160 /* I/O case */
2161 l = memory_access_size(mr, l, addr1);
2162 switch (l) {
2163 case 8:
2164 /* 64 bit read access */
2165 error |= io_mem_read(mr, addr1, &val, 8);
2166 stq_p(buf, val);
2167 break;
2168 case 4:
2169 /* 32 bit read access */
2170 error |= io_mem_read(mr, addr1, &val, 4);
2171 stl_p(buf, val);
2172 break;
2173 case 2:
2174 /* 16 bit read access */
2175 error |= io_mem_read(mr, addr1, &val, 2);
2176 stw_p(buf, val);
2177 break;
2178 case 1:
2179 /* 8 bit read access */
2180 error |= io_mem_read(mr, addr1, &val, 1);
2181 stb_p(buf, val);
2182 break;
2183 default:
2184 abort();
2186 } else {
2187 /* RAM case */
2188 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2189 memcpy(buf, ptr, l);
2192 len -= l;
2193 buf += l;
2194 addr += l;
2197 return error;
2200 bool address_space_write(AddressSpace *as, hwaddr addr,
2201 const uint8_t *buf, int len)
2203 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2206 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2208 return address_space_rw(as, addr, buf, len, false);
2212 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2213 int len, int is_write)
2215 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2218 enum write_rom_type {
2219 WRITE_DATA,
2220 FLUSH_CACHE,
2223 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2224 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2226 hwaddr l;
2227 uint8_t *ptr;
2228 hwaddr addr1;
2229 MemoryRegion *mr;
2231 while (len > 0) {
2232 l = len;
2233 mr = address_space_translate(as, addr, &addr1, &l, true);
2235 if (!(memory_region_is_ram(mr) ||
2236 memory_region_is_romd(mr))) {
2237 /* do nothing */
2238 } else {
2239 addr1 += memory_region_get_ram_addr(mr);
2240 /* ROM/RAM case */
2241 ptr = qemu_get_ram_ptr(addr1);
2242 switch (type) {
2243 case WRITE_DATA:
2244 memcpy(ptr, buf, l);
2245 invalidate_and_set_dirty(addr1, l);
2246 break;
2247 case FLUSH_CACHE:
2248 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2249 break;
2252 len -= l;
2253 buf += l;
2254 addr += l;
2258 /* used for ROM loading : can write in RAM and ROM */
2259 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2260 const uint8_t *buf, int len)
2262 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2265 void cpu_flush_icache_range(hwaddr start, int len)
2268 * This function should do the same thing as an icache flush that was
2269 * triggered from within the guest. For TCG we are always cache coherent,
2270 * so there is no need to flush anything. For KVM / Xen we need to flush
2271 * the host's instruction cache at least.
2273 if (tcg_enabled()) {
2274 return;
2277 cpu_physical_memory_write_rom_internal(&address_space_memory,
2278 start, NULL, len, FLUSH_CACHE);
2281 typedef struct {
2282 MemoryRegion *mr;
2283 void *buffer;
2284 hwaddr addr;
2285 hwaddr len;
2286 } BounceBuffer;
2288 static BounceBuffer bounce;
2290 typedef struct MapClient {
2291 void *opaque;
2292 void (*callback)(void *opaque);
2293 QLIST_ENTRY(MapClient) link;
2294 } MapClient;
2296 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2297 = QLIST_HEAD_INITIALIZER(map_client_list);
2299 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2301 MapClient *client = g_malloc(sizeof(*client));
2303 client->opaque = opaque;
2304 client->callback = callback;
2305 QLIST_INSERT_HEAD(&map_client_list, client, link);
2306 return client;
2309 static void cpu_unregister_map_client(void *_client)
2311 MapClient *client = (MapClient *)_client;
2313 QLIST_REMOVE(client, link);
2314 g_free(client);
2317 static void cpu_notify_map_clients(void)
2319 MapClient *client;
2321 while (!QLIST_EMPTY(&map_client_list)) {
2322 client = QLIST_FIRST(&map_client_list);
2323 client->callback(client->opaque);
2324 cpu_unregister_map_client(client);
2328 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2330 MemoryRegion *mr;
2331 hwaddr l, xlat;
2333 while (len > 0) {
2334 l = len;
2335 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2336 if (!memory_access_is_direct(mr, is_write)) {
2337 l = memory_access_size(mr, l, addr);
2338 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2339 return false;
2343 len -= l;
2344 addr += l;
2346 return true;
2349 /* Map a physical memory region into a host virtual address.
2350 * May map a subset of the requested range, given by and returned in *plen.
2351 * May return NULL if resources needed to perform the mapping are exhausted.
2352 * Use only for reads OR writes - not for read-modify-write operations.
2353 * Use cpu_register_map_client() to know when retrying the map operation is
2354 * likely to succeed.
2356 void *address_space_map(AddressSpace *as,
2357 hwaddr addr,
2358 hwaddr *plen,
2359 bool is_write)
2361 hwaddr len = *plen;
2362 hwaddr done = 0;
2363 hwaddr l, xlat, base;
2364 MemoryRegion *mr, *this_mr;
2365 ram_addr_t raddr;
2367 if (len == 0) {
2368 return NULL;
2371 l = len;
2372 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2373 if (!memory_access_is_direct(mr, is_write)) {
2374 if (bounce.buffer) {
2375 return NULL;
2377 /* Avoid unbounded allocations */
2378 l = MIN(l, TARGET_PAGE_SIZE);
2379 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2380 bounce.addr = addr;
2381 bounce.len = l;
2383 memory_region_ref(mr);
2384 bounce.mr = mr;
2385 if (!is_write) {
2386 address_space_read(as, addr, bounce.buffer, l);
2389 *plen = l;
2390 return bounce.buffer;
2393 base = xlat;
2394 raddr = memory_region_get_ram_addr(mr);
2396 for (;;) {
2397 len -= l;
2398 addr += l;
2399 done += l;
2400 if (len == 0) {
2401 break;
2404 l = len;
2405 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2406 if (this_mr != mr || xlat != base + done) {
2407 break;
2411 memory_region_ref(mr);
2412 *plen = done;
2413 return qemu_ram_ptr_length(raddr + base, plen);
2416 /* Unmaps a memory region previously mapped by address_space_map().
2417 * Will also mark the memory as dirty if is_write == 1. access_len gives
2418 * the amount of memory that was actually read or written by the caller.
2420 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2421 int is_write, hwaddr access_len)
2423 if (buffer != bounce.buffer) {
2424 MemoryRegion *mr;
2425 ram_addr_t addr1;
2427 mr = qemu_ram_addr_from_host(buffer, &addr1);
2428 assert(mr != NULL);
2429 if (is_write) {
2430 invalidate_and_set_dirty(addr1, access_len);
2432 if (xen_enabled()) {
2433 xen_invalidate_map_cache_entry(buffer);
2435 memory_region_unref(mr);
2436 return;
2438 if (is_write) {
2439 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2441 qemu_vfree(bounce.buffer);
2442 bounce.buffer = NULL;
2443 memory_region_unref(bounce.mr);
2444 cpu_notify_map_clients();
2447 void *cpu_physical_memory_map(hwaddr addr,
2448 hwaddr *plen,
2449 int is_write)
2451 return address_space_map(&address_space_memory, addr, plen, is_write);
2454 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2455 int is_write, hwaddr access_len)
2457 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2460 /* warning: addr must be aligned */
2461 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2462 enum device_endian endian)
2464 uint8_t *ptr;
2465 uint64_t val;
2466 MemoryRegion *mr;
2467 hwaddr l = 4;
2468 hwaddr addr1;
2470 mr = address_space_translate(as, addr, &addr1, &l, false);
2471 if (l < 4 || !memory_access_is_direct(mr, false)) {
2472 /* I/O case */
2473 io_mem_read(mr, addr1, &val, 4);
2474 #if defined(TARGET_WORDS_BIGENDIAN)
2475 if (endian == DEVICE_LITTLE_ENDIAN) {
2476 val = bswap32(val);
2478 #else
2479 if (endian == DEVICE_BIG_ENDIAN) {
2480 val = bswap32(val);
2482 #endif
2483 } else {
2484 /* RAM case */
2485 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2486 & TARGET_PAGE_MASK)
2487 + addr1);
2488 switch (endian) {
2489 case DEVICE_LITTLE_ENDIAN:
2490 val = ldl_le_p(ptr);
2491 break;
2492 case DEVICE_BIG_ENDIAN:
2493 val = ldl_be_p(ptr);
2494 break;
2495 default:
2496 val = ldl_p(ptr);
2497 break;
2500 return val;
2503 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2505 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2508 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2510 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2513 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2515 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2518 /* warning: addr must be aligned */
2519 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2520 enum device_endian endian)
2522 uint8_t *ptr;
2523 uint64_t val;
2524 MemoryRegion *mr;
2525 hwaddr l = 8;
2526 hwaddr addr1;
2528 mr = address_space_translate(as, addr, &addr1, &l,
2529 false);
2530 if (l < 8 || !memory_access_is_direct(mr, false)) {
2531 /* I/O case */
2532 io_mem_read(mr, addr1, &val, 8);
2533 #if defined(TARGET_WORDS_BIGENDIAN)
2534 if (endian == DEVICE_LITTLE_ENDIAN) {
2535 val = bswap64(val);
2537 #else
2538 if (endian == DEVICE_BIG_ENDIAN) {
2539 val = bswap64(val);
2541 #endif
2542 } else {
2543 /* RAM case */
2544 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2545 & TARGET_PAGE_MASK)
2546 + addr1);
2547 switch (endian) {
2548 case DEVICE_LITTLE_ENDIAN:
2549 val = ldq_le_p(ptr);
2550 break;
2551 case DEVICE_BIG_ENDIAN:
2552 val = ldq_be_p(ptr);
2553 break;
2554 default:
2555 val = ldq_p(ptr);
2556 break;
2559 return val;
2562 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2564 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2567 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2569 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2572 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2574 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2577 /* XXX: optimize */
2578 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2580 uint8_t val;
2581 address_space_rw(as, addr, &val, 1, 0);
2582 return val;
2585 /* warning: addr must be aligned */
2586 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2587 enum device_endian endian)
2589 uint8_t *ptr;
2590 uint64_t val;
2591 MemoryRegion *mr;
2592 hwaddr l = 2;
2593 hwaddr addr1;
2595 mr = address_space_translate(as, addr, &addr1, &l,
2596 false);
2597 if (l < 2 || !memory_access_is_direct(mr, false)) {
2598 /* I/O case */
2599 io_mem_read(mr, addr1, &val, 2);
2600 #if defined(TARGET_WORDS_BIGENDIAN)
2601 if (endian == DEVICE_LITTLE_ENDIAN) {
2602 val = bswap16(val);
2604 #else
2605 if (endian == DEVICE_BIG_ENDIAN) {
2606 val = bswap16(val);
2608 #endif
2609 } else {
2610 /* RAM case */
2611 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2612 & TARGET_PAGE_MASK)
2613 + addr1);
2614 switch (endian) {
2615 case DEVICE_LITTLE_ENDIAN:
2616 val = lduw_le_p(ptr);
2617 break;
2618 case DEVICE_BIG_ENDIAN:
2619 val = lduw_be_p(ptr);
2620 break;
2621 default:
2622 val = lduw_p(ptr);
2623 break;
2626 return val;
2629 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2631 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2634 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2636 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2639 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2641 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2644 /* warning: addr must be aligned. The ram page is not masked as dirty
2645 and the code inside is not invalidated. It is useful if the dirty
2646 bits are used to track modified PTEs */
2647 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2649 uint8_t *ptr;
2650 MemoryRegion *mr;
2651 hwaddr l = 4;
2652 hwaddr addr1;
2654 mr = address_space_translate(as, addr, &addr1, &l,
2655 true);
2656 if (l < 4 || !memory_access_is_direct(mr, true)) {
2657 io_mem_write(mr, addr1, val, 4);
2658 } else {
2659 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2660 ptr = qemu_get_ram_ptr(addr1);
2661 stl_p(ptr, val);
2663 if (unlikely(in_migration)) {
2664 if (cpu_physical_memory_is_clean(addr1)) {
2665 /* invalidate code */
2666 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2667 /* set dirty bit */
2668 cpu_physical_memory_set_dirty_range_nocode(addr1, 4);
2674 /* warning: addr must be aligned */
2675 static inline void stl_phys_internal(AddressSpace *as,
2676 hwaddr addr, uint32_t val,
2677 enum device_endian endian)
2679 uint8_t *ptr;
2680 MemoryRegion *mr;
2681 hwaddr l = 4;
2682 hwaddr addr1;
2684 mr = address_space_translate(as, addr, &addr1, &l,
2685 true);
2686 if (l < 4 || !memory_access_is_direct(mr, true)) {
2687 #if defined(TARGET_WORDS_BIGENDIAN)
2688 if (endian == DEVICE_LITTLE_ENDIAN) {
2689 val = bswap32(val);
2691 #else
2692 if (endian == DEVICE_BIG_ENDIAN) {
2693 val = bswap32(val);
2695 #endif
2696 io_mem_write(mr, addr1, val, 4);
2697 } else {
2698 /* RAM case */
2699 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2700 ptr = qemu_get_ram_ptr(addr1);
2701 switch (endian) {
2702 case DEVICE_LITTLE_ENDIAN:
2703 stl_le_p(ptr, val);
2704 break;
2705 case DEVICE_BIG_ENDIAN:
2706 stl_be_p(ptr, val);
2707 break;
2708 default:
2709 stl_p(ptr, val);
2710 break;
2712 invalidate_and_set_dirty(addr1, 4);
2716 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2718 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2721 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2723 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2726 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2728 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2731 /* XXX: optimize */
2732 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2734 uint8_t v = val;
2735 address_space_rw(as, addr, &v, 1, 1);
2738 /* warning: addr must be aligned */
2739 static inline void stw_phys_internal(AddressSpace *as,
2740 hwaddr addr, uint32_t val,
2741 enum device_endian endian)
2743 uint8_t *ptr;
2744 MemoryRegion *mr;
2745 hwaddr l = 2;
2746 hwaddr addr1;
2748 mr = address_space_translate(as, addr, &addr1, &l, true);
2749 if (l < 2 || !memory_access_is_direct(mr, true)) {
2750 #if defined(TARGET_WORDS_BIGENDIAN)
2751 if (endian == DEVICE_LITTLE_ENDIAN) {
2752 val = bswap16(val);
2754 #else
2755 if (endian == DEVICE_BIG_ENDIAN) {
2756 val = bswap16(val);
2758 #endif
2759 io_mem_write(mr, addr1, val, 2);
2760 } else {
2761 /* RAM case */
2762 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2763 ptr = qemu_get_ram_ptr(addr1);
2764 switch (endian) {
2765 case DEVICE_LITTLE_ENDIAN:
2766 stw_le_p(ptr, val);
2767 break;
2768 case DEVICE_BIG_ENDIAN:
2769 stw_be_p(ptr, val);
2770 break;
2771 default:
2772 stw_p(ptr, val);
2773 break;
2775 invalidate_and_set_dirty(addr1, 2);
2779 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2781 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2784 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2786 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2789 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2791 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2794 /* XXX: optimize */
2795 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2797 val = tswap64(val);
2798 address_space_rw(as, addr, (void *) &val, 8, 1);
2801 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2803 val = cpu_to_le64(val);
2804 address_space_rw(as, addr, (void *) &val, 8, 1);
2807 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2809 val = cpu_to_be64(val);
2810 address_space_rw(as, addr, (void *) &val, 8, 1);
2813 /* virtual memory access for debug (includes writing to ROM) */
2814 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2815 uint8_t *buf, int len, int is_write)
2817 int l;
2818 hwaddr phys_addr;
2819 target_ulong page;
2821 while (len > 0) {
2822 page = addr & TARGET_PAGE_MASK;
2823 phys_addr = cpu_get_phys_page_debug(cpu, page);
2824 /* if no physical page mapped, return an error */
2825 if (phys_addr == -1)
2826 return -1;
2827 l = (page + TARGET_PAGE_SIZE) - addr;
2828 if (l > len)
2829 l = len;
2830 phys_addr += (addr & ~TARGET_PAGE_MASK);
2831 if (is_write) {
2832 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
2833 } else {
2834 address_space_rw(cpu->as, phys_addr, buf, l, 0);
2836 len -= l;
2837 buf += l;
2838 addr += l;
2840 return 0;
2842 #endif
2845 * A helper function for the _utterly broken_ virtio device model to find out if
2846 * it's running on a big endian machine. Don't do this at home kids!
2848 bool target_words_bigendian(void);
2849 bool target_words_bigendian(void)
2851 #if defined(TARGET_WORDS_BIGENDIAN)
2852 return true;
2853 #else
2854 return false;
2855 #endif
2858 #ifndef CONFIG_USER_ONLY
2859 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2861 MemoryRegion*mr;
2862 hwaddr l = 1;
2864 mr = address_space_translate(&address_space_memory,
2865 phys_addr, &phys_addr, &l, false);
2867 return !(memory_region_is_ram(mr) ||
2868 memory_region_is_romd(mr));
2871 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2873 RAMBlock *block;
2875 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2876 func(block->host, block->offset, block->length, opaque);
2879 #endif