Merge remote-tracking branch 'remotes/otubo/tags/pull-seccomp-20150105' into staging
[qemu/ar7.git] / exec.c
blob9c3f3047d38c1038f742bb1186828dbdb6ffedcd
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #include "hw/qdev.h"
30 #include "qemu/osdep.h"
31 #include "sysemu/kvm.h"
32 #include "sysemu/sysemu.h"
33 #include "hw/xen/xen.h"
34 #include "qemu/timer.h"
35 #include "qemu/config-file.h"
36 #include "qemu/error-report.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
51 #include "exec/memory-internal.h"
52 #include "exec/ram_addr.h"
54 #include "qemu/range.h"
56 //#define DEBUG_SUBPAGE
58 #if !defined(CONFIG_USER_ONLY)
59 static bool in_migration;
61 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
63 static MemoryRegion *system_memory;
64 static MemoryRegion *system_io;
66 AddressSpace address_space_io;
67 AddressSpace address_space_memory;
69 MemoryRegion io_mem_rom, io_mem_notdirty;
70 static MemoryRegion io_mem_unassigned;
72 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
73 #define RAM_PREALLOC (1 << 0)
75 /* RAM is mmap-ed with MAP_SHARED */
76 #define RAM_SHARED (1 << 1)
78 #endif
80 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
81 /* current CPU in the current thread. It is only valid inside
82 cpu_exec() */
83 DEFINE_TLS(CPUState *, current_cpu);
84 /* 0 = Do not count executed instructions.
85 1 = Precise instruction counting.
86 2 = Adaptive rate instruction counting. */
87 int use_icount;
89 #if !defined(CONFIG_USER_ONLY)
91 typedef struct PhysPageEntry PhysPageEntry;
93 struct PhysPageEntry {
94 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
95 uint32_t skip : 6;
96 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
97 uint32_t ptr : 26;
100 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
102 /* Size of the L2 (and L3, etc) page tables. */
103 #define ADDR_SPACE_BITS 64
105 #define P_L2_BITS 9
106 #define P_L2_SIZE (1 << P_L2_BITS)
108 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
110 typedef PhysPageEntry Node[P_L2_SIZE];
112 typedef struct PhysPageMap {
113 unsigned sections_nb;
114 unsigned sections_nb_alloc;
115 unsigned nodes_nb;
116 unsigned nodes_nb_alloc;
117 Node *nodes;
118 MemoryRegionSection *sections;
119 } PhysPageMap;
121 struct AddressSpaceDispatch {
122 /* This is a multi-level map on the physical address space.
123 * The bottom level has pointers to MemoryRegionSections.
125 PhysPageEntry phys_map;
126 PhysPageMap map;
127 AddressSpace *as;
130 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
131 typedef struct subpage_t {
132 MemoryRegion iomem;
133 AddressSpace *as;
134 hwaddr base;
135 uint16_t sub_section[TARGET_PAGE_SIZE];
136 } subpage_t;
138 #define PHYS_SECTION_UNASSIGNED 0
139 #define PHYS_SECTION_NOTDIRTY 1
140 #define PHYS_SECTION_ROM 2
141 #define PHYS_SECTION_WATCH 3
143 static void io_mem_init(void);
144 static void memory_map_init(void);
145 static void tcg_commit(MemoryListener *listener);
147 static MemoryRegion io_mem_watch;
148 #endif
150 #if !defined(CONFIG_USER_ONLY)
152 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
154 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
155 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
156 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
157 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
161 static uint32_t phys_map_node_alloc(PhysPageMap *map)
163 unsigned i;
164 uint32_t ret;
166 ret = map->nodes_nb++;
167 assert(ret != PHYS_MAP_NODE_NIL);
168 assert(ret != map->nodes_nb_alloc);
169 for (i = 0; i < P_L2_SIZE; ++i) {
170 map->nodes[ret][i].skip = 1;
171 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
173 return ret;
176 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
177 hwaddr *index, hwaddr *nb, uint16_t leaf,
178 int level)
180 PhysPageEntry *p;
181 int i;
182 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
184 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
185 lp->ptr = phys_map_node_alloc(map);
186 p = map->nodes[lp->ptr];
187 if (level == 0) {
188 for (i = 0; i < P_L2_SIZE; i++) {
189 p[i].skip = 0;
190 p[i].ptr = PHYS_SECTION_UNASSIGNED;
193 } else {
194 p = map->nodes[lp->ptr];
196 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
198 while (*nb && lp < &p[P_L2_SIZE]) {
199 if ((*index & (step - 1)) == 0 && *nb >= step) {
200 lp->skip = 0;
201 lp->ptr = leaf;
202 *index += step;
203 *nb -= step;
204 } else {
205 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
207 ++lp;
211 static void phys_page_set(AddressSpaceDispatch *d,
212 hwaddr index, hwaddr nb,
213 uint16_t leaf)
215 /* Wildly overreserve - it doesn't matter much. */
216 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
218 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
221 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
222 * and update our entry so we can skip it and go directly to the destination.
224 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
226 unsigned valid_ptr = P_L2_SIZE;
227 int valid = 0;
228 PhysPageEntry *p;
229 int i;
231 if (lp->ptr == PHYS_MAP_NODE_NIL) {
232 return;
235 p = nodes[lp->ptr];
236 for (i = 0; i < P_L2_SIZE; i++) {
237 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
238 continue;
241 valid_ptr = i;
242 valid++;
243 if (p[i].skip) {
244 phys_page_compact(&p[i], nodes, compacted);
248 /* We can only compress if there's only one child. */
249 if (valid != 1) {
250 return;
253 assert(valid_ptr < P_L2_SIZE);
255 /* Don't compress if it won't fit in the # of bits we have. */
256 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
257 return;
260 lp->ptr = p[valid_ptr].ptr;
261 if (!p[valid_ptr].skip) {
262 /* If our only child is a leaf, make this a leaf. */
263 /* By design, we should have made this node a leaf to begin with so we
264 * should never reach here.
265 * But since it's so simple to handle this, let's do it just in case we
266 * change this rule.
268 lp->skip = 0;
269 } else {
270 lp->skip += p[valid_ptr].skip;
274 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
276 DECLARE_BITMAP(compacted, nodes_nb);
278 if (d->phys_map.skip) {
279 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
283 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
284 Node *nodes, MemoryRegionSection *sections)
286 PhysPageEntry *p;
287 hwaddr index = addr >> TARGET_PAGE_BITS;
288 int i;
290 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
291 if (lp.ptr == PHYS_MAP_NODE_NIL) {
292 return &sections[PHYS_SECTION_UNASSIGNED];
294 p = nodes[lp.ptr];
295 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
298 if (sections[lp.ptr].size.hi ||
299 range_covers_byte(sections[lp.ptr].offset_within_address_space,
300 sections[lp.ptr].size.lo, addr)) {
301 return &sections[lp.ptr];
302 } else {
303 return &sections[PHYS_SECTION_UNASSIGNED];
307 bool memory_region_is_unassigned(MemoryRegion *mr)
309 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
310 && mr != &io_mem_watch;
313 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
314 hwaddr addr,
315 bool resolve_subpage)
317 MemoryRegionSection *section;
318 subpage_t *subpage;
320 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
321 if (resolve_subpage && section->mr->subpage) {
322 subpage = container_of(section->mr, subpage_t, iomem);
323 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
325 return section;
328 static MemoryRegionSection *
329 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
330 hwaddr *plen, bool resolve_subpage)
332 MemoryRegionSection *section;
333 Int128 diff;
335 section = address_space_lookup_region(d, addr, resolve_subpage);
336 /* Compute offset within MemoryRegionSection */
337 addr -= section->offset_within_address_space;
339 /* Compute offset within MemoryRegion */
340 *xlat = addr + section->offset_within_region;
342 diff = int128_sub(section->mr->size, int128_make64(addr));
343 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
344 return section;
347 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
349 if (memory_region_is_ram(mr)) {
350 return !(is_write && mr->readonly);
352 if (memory_region_is_romd(mr)) {
353 return !is_write;
356 return false;
359 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
360 hwaddr *xlat, hwaddr *plen,
361 bool is_write)
363 IOMMUTLBEntry iotlb;
364 MemoryRegionSection *section;
365 MemoryRegion *mr;
366 hwaddr len = *plen;
368 for (;;) {
369 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
370 mr = section->mr;
372 if (!mr->iommu_ops) {
373 break;
376 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
377 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
378 | (addr & iotlb.addr_mask));
379 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
380 if (!(iotlb.perm & (1 << is_write))) {
381 mr = &io_mem_unassigned;
382 break;
385 as = iotlb.target_as;
388 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
389 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
390 len = MIN(page, len);
393 *plen = len;
394 *xlat = addr;
395 return mr;
398 MemoryRegionSection *
399 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
400 hwaddr *plen)
402 MemoryRegionSection *section;
403 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
405 assert(!section->mr->iommu_ops);
406 return section;
408 #endif
410 void cpu_exec_init_all(void)
412 #if !defined(CONFIG_USER_ONLY)
413 qemu_mutex_init(&ram_list.mutex);
414 memory_map_init();
415 io_mem_init();
416 #endif
419 #if !defined(CONFIG_USER_ONLY)
421 static int cpu_common_post_load(void *opaque, int version_id)
423 CPUState *cpu = opaque;
425 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
426 version_id is increased. */
427 cpu->interrupt_request &= ~0x01;
428 tlb_flush(cpu, 1);
430 return 0;
433 static int cpu_common_pre_load(void *opaque)
435 CPUState *cpu = opaque;
437 cpu->exception_index = -1;
439 return 0;
442 static bool cpu_common_exception_index_needed(void *opaque)
444 CPUState *cpu = opaque;
446 return tcg_enabled() && cpu->exception_index != -1;
449 static const VMStateDescription vmstate_cpu_common_exception_index = {
450 .name = "cpu_common/exception_index",
451 .version_id = 1,
452 .minimum_version_id = 1,
453 .fields = (VMStateField[]) {
454 VMSTATE_INT32(exception_index, CPUState),
455 VMSTATE_END_OF_LIST()
459 const VMStateDescription vmstate_cpu_common = {
460 .name = "cpu_common",
461 .version_id = 1,
462 .minimum_version_id = 1,
463 .pre_load = cpu_common_pre_load,
464 .post_load = cpu_common_post_load,
465 .fields = (VMStateField[]) {
466 VMSTATE_UINT32(halted, CPUState),
467 VMSTATE_UINT32(interrupt_request, CPUState),
468 VMSTATE_END_OF_LIST()
470 .subsections = (VMStateSubsection[]) {
472 .vmsd = &vmstate_cpu_common_exception_index,
473 .needed = cpu_common_exception_index_needed,
474 } , {
475 /* empty */
480 #endif
482 CPUState *qemu_get_cpu(int index)
484 CPUState *cpu;
486 CPU_FOREACH(cpu) {
487 if (cpu->cpu_index == index) {
488 return cpu;
492 return NULL;
495 #if !defined(CONFIG_USER_ONLY)
496 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
498 /* We only support one address space per cpu at the moment. */
499 assert(cpu->as == as);
501 if (cpu->tcg_as_listener) {
502 memory_listener_unregister(cpu->tcg_as_listener);
503 } else {
504 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
506 cpu->tcg_as_listener->commit = tcg_commit;
507 memory_listener_register(cpu->tcg_as_listener, as);
509 #endif
511 void cpu_exec_init(CPUArchState *env)
513 CPUState *cpu = ENV_GET_CPU(env);
514 CPUClass *cc = CPU_GET_CLASS(cpu);
515 CPUState *some_cpu;
516 int cpu_index;
518 #if defined(CONFIG_USER_ONLY)
519 cpu_list_lock();
520 #endif
521 cpu_index = 0;
522 CPU_FOREACH(some_cpu) {
523 cpu_index++;
525 cpu->cpu_index = cpu_index;
526 cpu->numa_node = 0;
527 QTAILQ_INIT(&cpu->breakpoints);
528 QTAILQ_INIT(&cpu->watchpoints);
529 #ifndef CONFIG_USER_ONLY
530 cpu->as = &address_space_memory;
531 cpu->thread_id = qemu_get_thread_id();
532 #endif
533 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
534 #if defined(CONFIG_USER_ONLY)
535 cpu_list_unlock();
536 #endif
537 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
538 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
540 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
541 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
542 cpu_save, cpu_load, env);
543 assert(cc->vmsd == NULL);
544 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
545 #endif
546 if (cc->vmsd != NULL) {
547 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
551 #if defined(TARGET_HAS_ICE)
552 #if defined(CONFIG_USER_ONLY)
553 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
555 tb_invalidate_phys_page_range(pc, pc + 1, 0);
557 #else
558 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
560 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
561 if (phys != -1) {
562 tb_invalidate_phys_addr(cpu->as,
563 phys | (pc & ~TARGET_PAGE_MASK));
566 #endif
567 #endif /* TARGET_HAS_ICE */
569 #if defined(CONFIG_USER_ONLY)
570 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
575 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
576 int flags)
578 return -ENOSYS;
581 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
585 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
586 int flags, CPUWatchpoint **watchpoint)
588 return -ENOSYS;
590 #else
591 /* Add a watchpoint. */
592 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
593 int flags, CPUWatchpoint **watchpoint)
595 CPUWatchpoint *wp;
597 /* forbid ranges which are empty or run off the end of the address space */
598 if (len == 0 || (addr + len - 1) < addr) {
599 error_report("tried to set invalid watchpoint at %"
600 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
601 return -EINVAL;
603 wp = g_malloc(sizeof(*wp));
605 wp->vaddr = addr;
606 wp->len = len;
607 wp->flags = flags;
609 /* keep all GDB-injected watchpoints in front */
610 if (flags & BP_GDB) {
611 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
612 } else {
613 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
616 tlb_flush_page(cpu, addr);
618 if (watchpoint)
619 *watchpoint = wp;
620 return 0;
623 /* Remove a specific watchpoint. */
624 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
625 int flags)
627 CPUWatchpoint *wp;
629 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
630 if (addr == wp->vaddr && len == wp->len
631 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
632 cpu_watchpoint_remove_by_ref(cpu, wp);
633 return 0;
636 return -ENOENT;
639 /* Remove a specific watchpoint by reference. */
640 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
642 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
644 tlb_flush_page(cpu, watchpoint->vaddr);
646 g_free(watchpoint);
649 /* Remove all matching watchpoints. */
650 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
652 CPUWatchpoint *wp, *next;
654 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
655 if (wp->flags & mask) {
656 cpu_watchpoint_remove_by_ref(cpu, wp);
661 /* Return true if this watchpoint address matches the specified
662 * access (ie the address range covered by the watchpoint overlaps
663 * partially or completely with the address range covered by the
664 * access).
666 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
667 vaddr addr,
668 vaddr len)
670 /* We know the lengths are non-zero, but a little caution is
671 * required to avoid errors in the case where the range ends
672 * exactly at the top of the address space and so addr + len
673 * wraps round to zero.
675 vaddr wpend = wp->vaddr + wp->len - 1;
676 vaddr addrend = addr + len - 1;
678 return !(addr > wpend || wp->vaddr > addrend);
681 #endif
683 /* Add a breakpoint. */
684 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
685 CPUBreakpoint **breakpoint)
687 #if defined(TARGET_HAS_ICE)
688 CPUBreakpoint *bp;
690 bp = g_malloc(sizeof(*bp));
692 bp->pc = pc;
693 bp->flags = flags;
695 /* keep all GDB-injected breakpoints in front */
696 if (flags & BP_GDB) {
697 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
698 } else {
699 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
702 breakpoint_invalidate(cpu, pc);
704 if (breakpoint) {
705 *breakpoint = bp;
707 return 0;
708 #else
709 return -ENOSYS;
710 #endif
713 /* Remove a specific breakpoint. */
714 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
716 #if defined(TARGET_HAS_ICE)
717 CPUBreakpoint *bp;
719 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
720 if (bp->pc == pc && bp->flags == flags) {
721 cpu_breakpoint_remove_by_ref(cpu, bp);
722 return 0;
725 return -ENOENT;
726 #else
727 return -ENOSYS;
728 #endif
731 /* Remove a specific breakpoint by reference. */
732 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
734 #if defined(TARGET_HAS_ICE)
735 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
737 breakpoint_invalidate(cpu, breakpoint->pc);
739 g_free(breakpoint);
740 #endif
743 /* Remove all matching breakpoints. */
744 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
746 #if defined(TARGET_HAS_ICE)
747 CPUBreakpoint *bp, *next;
749 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
750 if (bp->flags & mask) {
751 cpu_breakpoint_remove_by_ref(cpu, bp);
754 #endif
757 /* enable or disable single step mode. EXCP_DEBUG is returned by the
758 CPU loop after each instruction */
759 void cpu_single_step(CPUState *cpu, int enabled)
761 #if defined(TARGET_HAS_ICE)
762 if (cpu->singlestep_enabled != enabled) {
763 cpu->singlestep_enabled = enabled;
764 if (kvm_enabled()) {
765 kvm_update_guest_debug(cpu, 0);
766 } else {
767 /* must flush all the translated code to avoid inconsistencies */
768 /* XXX: only flush what is necessary */
769 CPUArchState *env = cpu->env_ptr;
770 tb_flush(env);
773 #endif
776 void cpu_abort(CPUState *cpu, const char *fmt, ...)
778 va_list ap;
779 va_list ap2;
781 va_start(ap, fmt);
782 va_copy(ap2, ap);
783 fprintf(stderr, "qemu: fatal: ");
784 vfprintf(stderr, fmt, ap);
785 fprintf(stderr, "\n");
786 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
787 if (qemu_log_enabled()) {
788 qemu_log("qemu: fatal: ");
789 qemu_log_vprintf(fmt, ap2);
790 qemu_log("\n");
791 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
792 qemu_log_flush();
793 qemu_log_close();
795 va_end(ap2);
796 va_end(ap);
797 #if defined(CONFIG_USER_ONLY)
799 struct sigaction act;
800 sigfillset(&act.sa_mask);
801 act.sa_handler = SIG_DFL;
802 sigaction(SIGABRT, &act, NULL);
804 #endif
805 abort();
808 #if !defined(CONFIG_USER_ONLY)
809 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
811 RAMBlock *block;
813 /* The list is protected by the iothread lock here. */
814 block = ram_list.mru_block;
815 if (block && addr - block->offset < block->length) {
816 goto found;
818 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
819 if (addr - block->offset < block->length) {
820 goto found;
824 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
825 abort();
827 found:
828 ram_list.mru_block = block;
829 return block;
832 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
834 ram_addr_t start1;
835 RAMBlock *block;
836 ram_addr_t end;
838 end = TARGET_PAGE_ALIGN(start + length);
839 start &= TARGET_PAGE_MASK;
841 block = qemu_get_ram_block(start);
842 assert(block == qemu_get_ram_block(end - 1));
843 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
844 cpu_tlb_reset_dirty_all(start1, length);
847 /* Note: start and end must be within the same ram block. */
848 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
849 unsigned client)
851 if (length == 0)
852 return;
853 cpu_physical_memory_clear_dirty_range(start, length, client);
855 if (tcg_enabled()) {
856 tlb_reset_dirty_range_all(start, length);
860 static void cpu_physical_memory_set_dirty_tracking(bool enable)
862 in_migration = enable;
865 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
866 MemoryRegionSection *section,
867 target_ulong vaddr,
868 hwaddr paddr, hwaddr xlat,
869 int prot,
870 target_ulong *address)
872 hwaddr iotlb;
873 CPUWatchpoint *wp;
875 if (memory_region_is_ram(section->mr)) {
876 /* Normal RAM. */
877 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
878 + xlat;
879 if (!section->readonly) {
880 iotlb |= PHYS_SECTION_NOTDIRTY;
881 } else {
882 iotlb |= PHYS_SECTION_ROM;
884 } else {
885 iotlb = section - section->address_space->dispatch->map.sections;
886 iotlb += xlat;
889 /* Make accesses to pages with watchpoints go via the
890 watchpoint trap routines. */
891 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
892 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
893 /* Avoid trapping reads of pages with a write breakpoint. */
894 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
895 iotlb = PHYS_SECTION_WATCH + paddr;
896 *address |= TLB_MMIO;
897 break;
902 return iotlb;
904 #endif /* defined(CONFIG_USER_ONLY) */
906 #if !defined(CONFIG_USER_ONLY)
908 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
909 uint16_t section);
910 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
912 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
913 qemu_anon_ram_alloc;
916 * Set a custom physical guest memory alloator.
917 * Accelerators with unusual needs may need this. Hopefully, we can
918 * get rid of it eventually.
920 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
922 phys_mem_alloc = alloc;
925 static uint16_t phys_section_add(PhysPageMap *map,
926 MemoryRegionSection *section)
928 /* The physical section number is ORed with a page-aligned
929 * pointer to produce the iotlb entries. Thus it should
930 * never overflow into the page-aligned value.
932 assert(map->sections_nb < TARGET_PAGE_SIZE);
934 if (map->sections_nb == map->sections_nb_alloc) {
935 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
936 map->sections = g_renew(MemoryRegionSection, map->sections,
937 map->sections_nb_alloc);
939 map->sections[map->sections_nb] = *section;
940 memory_region_ref(section->mr);
941 return map->sections_nb++;
944 static void phys_section_destroy(MemoryRegion *mr)
946 memory_region_unref(mr);
948 if (mr->subpage) {
949 subpage_t *subpage = container_of(mr, subpage_t, iomem);
950 object_unref(OBJECT(&subpage->iomem));
951 g_free(subpage);
955 static void phys_sections_free(PhysPageMap *map)
957 while (map->sections_nb > 0) {
958 MemoryRegionSection *section = &map->sections[--map->sections_nb];
959 phys_section_destroy(section->mr);
961 g_free(map->sections);
962 g_free(map->nodes);
965 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
967 subpage_t *subpage;
968 hwaddr base = section->offset_within_address_space
969 & TARGET_PAGE_MASK;
970 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
971 d->map.nodes, d->map.sections);
972 MemoryRegionSection subsection = {
973 .offset_within_address_space = base,
974 .size = int128_make64(TARGET_PAGE_SIZE),
976 hwaddr start, end;
978 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
980 if (!(existing->mr->subpage)) {
981 subpage = subpage_init(d->as, base);
982 subsection.address_space = d->as;
983 subsection.mr = &subpage->iomem;
984 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
985 phys_section_add(&d->map, &subsection));
986 } else {
987 subpage = container_of(existing->mr, subpage_t, iomem);
989 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
990 end = start + int128_get64(section->size) - 1;
991 subpage_register(subpage, start, end,
992 phys_section_add(&d->map, section));
996 static void register_multipage(AddressSpaceDispatch *d,
997 MemoryRegionSection *section)
999 hwaddr start_addr = section->offset_within_address_space;
1000 uint16_t section_index = phys_section_add(&d->map, section);
1001 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1002 TARGET_PAGE_BITS));
1004 assert(num_pages);
1005 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1008 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1010 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1011 AddressSpaceDispatch *d = as->next_dispatch;
1012 MemoryRegionSection now = *section, remain = *section;
1013 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1015 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1016 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1017 - now.offset_within_address_space;
1019 now.size = int128_min(int128_make64(left), now.size);
1020 register_subpage(d, &now);
1021 } else {
1022 now.size = int128_zero();
1024 while (int128_ne(remain.size, now.size)) {
1025 remain.size = int128_sub(remain.size, now.size);
1026 remain.offset_within_address_space += int128_get64(now.size);
1027 remain.offset_within_region += int128_get64(now.size);
1028 now = remain;
1029 if (int128_lt(remain.size, page_size)) {
1030 register_subpage(d, &now);
1031 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1032 now.size = page_size;
1033 register_subpage(d, &now);
1034 } else {
1035 now.size = int128_and(now.size, int128_neg(page_size));
1036 register_multipage(d, &now);
1041 void qemu_flush_coalesced_mmio_buffer(void)
1043 if (kvm_enabled())
1044 kvm_flush_coalesced_mmio_buffer();
1047 void qemu_mutex_lock_ramlist(void)
1049 qemu_mutex_lock(&ram_list.mutex);
1052 void qemu_mutex_unlock_ramlist(void)
1054 qemu_mutex_unlock(&ram_list.mutex);
1057 #ifdef __linux__
1059 #include <sys/vfs.h>
1061 #define HUGETLBFS_MAGIC 0x958458f6
1063 static long gethugepagesize(const char *path, Error **errp)
1065 struct statfs fs;
1066 int ret;
1068 do {
1069 ret = statfs(path, &fs);
1070 } while (ret != 0 && errno == EINTR);
1072 if (ret != 0) {
1073 error_setg_errno(errp, errno, "failed to get page size of file %s",
1074 path);
1075 return 0;
1078 if (fs.f_type != HUGETLBFS_MAGIC)
1079 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1081 return fs.f_bsize;
1084 static void *file_ram_alloc(RAMBlock *block,
1085 ram_addr_t memory,
1086 const char *path,
1087 Error **errp)
1089 char *filename;
1090 char *sanitized_name;
1091 char *c;
1092 void *area = NULL;
1093 int fd;
1094 uint64_t hpagesize;
1095 Error *local_err = NULL;
1097 hpagesize = gethugepagesize(path, &local_err);
1098 if (local_err) {
1099 error_propagate(errp, local_err);
1100 goto error;
1102 block->mr->align = hpagesize;
1104 if (memory < hpagesize) {
1105 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1106 "or larger than huge page size 0x%" PRIx64,
1107 memory, hpagesize);
1108 goto error;
1111 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1112 error_setg(errp,
1113 "host lacks kvm mmu notifiers, -mem-path unsupported");
1114 goto error;
1117 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1118 sanitized_name = g_strdup(memory_region_name(block->mr));
1119 for (c = sanitized_name; *c != '\0'; c++) {
1120 if (*c == '/')
1121 *c = '_';
1124 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1125 sanitized_name);
1126 g_free(sanitized_name);
1128 fd = mkstemp(filename);
1129 if (fd < 0) {
1130 error_setg_errno(errp, errno,
1131 "unable to create backing store for hugepages");
1132 g_free(filename);
1133 goto error;
1135 unlink(filename);
1136 g_free(filename);
1138 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1141 * ftruncate is not supported by hugetlbfs in older
1142 * hosts, so don't bother bailing out on errors.
1143 * If anything goes wrong with it under other filesystems,
1144 * mmap will fail.
1146 if (ftruncate(fd, memory)) {
1147 perror("ftruncate");
1150 area = mmap(0, memory, PROT_READ | PROT_WRITE,
1151 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
1152 fd, 0);
1153 if (area == MAP_FAILED) {
1154 error_setg_errno(errp, errno,
1155 "unable to map backing store for hugepages");
1156 close(fd);
1157 goto error;
1160 if (mem_prealloc) {
1161 os_mem_prealloc(fd, area, memory);
1164 block->fd = fd;
1165 return area;
1167 error:
1168 if (mem_prealloc) {
1169 error_report("%s\n", error_get_pretty(*errp));
1170 exit(1);
1172 return NULL;
1174 #endif
1176 static ram_addr_t find_ram_offset(ram_addr_t size)
1178 RAMBlock *block, *next_block;
1179 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1181 assert(size != 0); /* it would hand out same offset multiple times */
1183 if (QTAILQ_EMPTY(&ram_list.blocks))
1184 return 0;
1186 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1187 ram_addr_t end, next = RAM_ADDR_MAX;
1189 end = block->offset + block->length;
1191 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1192 if (next_block->offset >= end) {
1193 next = MIN(next, next_block->offset);
1196 if (next - end >= size && next - end < mingap) {
1197 offset = end;
1198 mingap = next - end;
1202 if (offset == RAM_ADDR_MAX) {
1203 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1204 (uint64_t)size);
1205 abort();
1208 return offset;
1211 ram_addr_t last_ram_offset(void)
1213 RAMBlock *block;
1214 ram_addr_t last = 0;
1216 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1217 last = MAX(last, block->offset + block->length);
1219 return last;
1222 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1224 int ret;
1226 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1227 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1228 "dump-guest-core", true)) {
1229 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1230 if (ret) {
1231 perror("qemu_madvise");
1232 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1233 "but dump_guest_core=off specified\n");
1238 static RAMBlock *find_ram_block(ram_addr_t addr)
1240 RAMBlock *block;
1242 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1243 if (block->offset == addr) {
1244 return block;
1248 return NULL;
1251 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1253 RAMBlock *new_block = find_ram_block(addr);
1254 RAMBlock *block;
1256 assert(new_block);
1257 assert(!new_block->idstr[0]);
1259 if (dev) {
1260 char *id = qdev_get_dev_path(dev);
1261 if (id) {
1262 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1263 g_free(id);
1266 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1268 /* This assumes the iothread lock is taken here too. */
1269 qemu_mutex_lock_ramlist();
1270 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1271 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1272 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1273 new_block->idstr);
1274 abort();
1277 qemu_mutex_unlock_ramlist();
1280 void qemu_ram_unset_idstr(ram_addr_t addr)
1282 RAMBlock *block = find_ram_block(addr);
1284 if (block) {
1285 memset(block->idstr, 0, sizeof(block->idstr));
1289 static int memory_try_enable_merging(void *addr, size_t len)
1291 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1292 /* disabled by the user */
1293 return 0;
1296 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1299 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1301 RAMBlock *block;
1302 ram_addr_t old_ram_size, new_ram_size;
1304 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1306 /* This assumes the iothread lock is taken here too. */
1307 qemu_mutex_lock_ramlist();
1308 new_block->offset = find_ram_offset(new_block->length);
1310 if (!new_block->host) {
1311 if (xen_enabled()) {
1312 xen_ram_alloc(new_block->offset, new_block->length, new_block->mr);
1313 } else {
1314 new_block->host = phys_mem_alloc(new_block->length,
1315 &new_block->mr->align);
1316 if (!new_block->host) {
1317 error_setg_errno(errp, errno,
1318 "cannot set up guest memory '%s'",
1319 memory_region_name(new_block->mr));
1320 qemu_mutex_unlock_ramlist();
1321 return -1;
1323 memory_try_enable_merging(new_block->host, new_block->length);
1327 /* Keep the list sorted from biggest to smallest block. */
1328 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1329 if (block->length < new_block->length) {
1330 break;
1333 if (block) {
1334 QTAILQ_INSERT_BEFORE(block, new_block, next);
1335 } else {
1336 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1338 ram_list.mru_block = NULL;
1340 ram_list.version++;
1341 qemu_mutex_unlock_ramlist();
1343 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1345 if (new_ram_size > old_ram_size) {
1346 int i;
1347 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1348 ram_list.dirty_memory[i] =
1349 bitmap_zero_extend(ram_list.dirty_memory[i],
1350 old_ram_size, new_ram_size);
1353 cpu_physical_memory_set_dirty_range(new_block->offset, new_block->length);
1355 qemu_ram_setup_dump(new_block->host, new_block->length);
1356 qemu_madvise(new_block->host, new_block->length, QEMU_MADV_HUGEPAGE);
1357 qemu_madvise(new_block->host, new_block->length, QEMU_MADV_DONTFORK);
1359 if (kvm_enabled()) {
1360 kvm_setup_guest_memory(new_block->host, new_block->length);
1363 return new_block->offset;
1366 #ifdef __linux__
1367 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1368 bool share, const char *mem_path,
1369 Error **errp)
1371 RAMBlock *new_block;
1372 ram_addr_t addr;
1373 Error *local_err = NULL;
1375 if (xen_enabled()) {
1376 error_setg(errp, "-mem-path not supported with Xen");
1377 return -1;
1380 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1382 * file_ram_alloc() needs to allocate just like
1383 * phys_mem_alloc, but we haven't bothered to provide
1384 * a hook there.
1386 error_setg(errp,
1387 "-mem-path not supported with this accelerator");
1388 return -1;
1391 size = TARGET_PAGE_ALIGN(size);
1392 new_block = g_malloc0(sizeof(*new_block));
1393 new_block->mr = mr;
1394 new_block->length = size;
1395 new_block->flags = share ? RAM_SHARED : 0;
1396 new_block->host = file_ram_alloc(new_block, size,
1397 mem_path, errp);
1398 if (!new_block->host) {
1399 g_free(new_block);
1400 return -1;
1403 addr = ram_block_add(new_block, &local_err);
1404 if (local_err) {
1405 g_free(new_block);
1406 error_propagate(errp, local_err);
1407 return -1;
1409 return addr;
1411 #endif
1413 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1414 MemoryRegion *mr, Error **errp)
1416 RAMBlock *new_block;
1417 ram_addr_t addr;
1418 Error *local_err = NULL;
1420 size = TARGET_PAGE_ALIGN(size);
1421 new_block = g_malloc0(sizeof(*new_block));
1422 new_block->mr = mr;
1423 new_block->length = size;
1424 new_block->fd = -1;
1425 new_block->host = host;
1426 if (host) {
1427 new_block->flags |= RAM_PREALLOC;
1429 addr = ram_block_add(new_block, &local_err);
1430 if (local_err) {
1431 g_free(new_block);
1432 error_propagate(errp, local_err);
1433 return -1;
1435 return addr;
1438 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1440 return qemu_ram_alloc_from_ptr(size, NULL, mr, errp);
1443 void qemu_ram_free_from_ptr(ram_addr_t addr)
1445 RAMBlock *block;
1447 /* This assumes the iothread lock is taken here too. */
1448 qemu_mutex_lock_ramlist();
1449 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1450 if (addr == block->offset) {
1451 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1452 ram_list.mru_block = NULL;
1453 ram_list.version++;
1454 g_free(block);
1455 break;
1458 qemu_mutex_unlock_ramlist();
1461 void qemu_ram_free(ram_addr_t addr)
1463 RAMBlock *block;
1465 /* This assumes the iothread lock is taken here too. */
1466 qemu_mutex_lock_ramlist();
1467 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1468 if (addr == block->offset) {
1469 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1470 ram_list.mru_block = NULL;
1471 ram_list.version++;
1472 if (block->flags & RAM_PREALLOC) {
1474 } else if (xen_enabled()) {
1475 xen_invalidate_map_cache_entry(block->host);
1476 #ifndef _WIN32
1477 } else if (block->fd >= 0) {
1478 munmap(block->host, block->length);
1479 close(block->fd);
1480 #endif
1481 } else {
1482 qemu_anon_ram_free(block->host, block->length);
1484 g_free(block);
1485 break;
1488 qemu_mutex_unlock_ramlist();
1492 #ifndef _WIN32
1493 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1495 RAMBlock *block;
1496 ram_addr_t offset;
1497 int flags;
1498 void *area, *vaddr;
1500 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1501 offset = addr - block->offset;
1502 if (offset < block->length) {
1503 vaddr = ramblock_ptr(block, offset);
1504 if (block->flags & RAM_PREALLOC) {
1506 } else if (xen_enabled()) {
1507 abort();
1508 } else {
1509 flags = MAP_FIXED;
1510 munmap(vaddr, length);
1511 if (block->fd >= 0) {
1512 flags |= (block->flags & RAM_SHARED ?
1513 MAP_SHARED : MAP_PRIVATE);
1514 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1515 flags, block->fd, offset);
1516 } else {
1518 * Remap needs to match alloc. Accelerators that
1519 * set phys_mem_alloc never remap. If they did,
1520 * we'd need a remap hook here.
1522 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1524 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1525 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1526 flags, -1, 0);
1528 if (area != vaddr) {
1529 fprintf(stderr, "Could not remap addr: "
1530 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1531 length, addr);
1532 exit(1);
1534 memory_try_enable_merging(vaddr, length);
1535 qemu_ram_setup_dump(vaddr, length);
1537 return;
1541 #endif /* !_WIN32 */
1543 int qemu_get_ram_fd(ram_addr_t addr)
1545 RAMBlock *block = qemu_get_ram_block(addr);
1547 return block->fd;
1550 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1552 RAMBlock *block = qemu_get_ram_block(addr);
1554 return ramblock_ptr(block, 0);
1557 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1558 With the exception of the softmmu code in this file, this should
1559 only be used for local memory (e.g. video ram) that the device owns,
1560 and knows it isn't going to access beyond the end of the block.
1562 It should not be used for general purpose DMA.
1563 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1565 void *qemu_get_ram_ptr(ram_addr_t addr)
1567 RAMBlock *block = qemu_get_ram_block(addr);
1569 if (xen_enabled()) {
1570 /* We need to check if the requested address is in the RAM
1571 * because we don't want to map the entire memory in QEMU.
1572 * In that case just map until the end of the page.
1574 if (block->offset == 0) {
1575 return xen_map_cache(addr, 0, 0);
1576 } else if (block->host == NULL) {
1577 block->host =
1578 xen_map_cache(block->offset, block->length, 1);
1581 return ramblock_ptr(block, addr - block->offset);
1584 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1585 * but takes a size argument */
1586 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1588 if (*size == 0) {
1589 return NULL;
1591 if (xen_enabled()) {
1592 return xen_map_cache(addr, *size, 1);
1593 } else {
1594 RAMBlock *block;
1596 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1597 if (addr - block->offset < block->length) {
1598 if (addr - block->offset + *size > block->length)
1599 *size = block->length - addr + block->offset;
1600 return ramblock_ptr(block, addr - block->offset);
1604 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1605 abort();
1609 /* Some of the softmmu routines need to translate from a host pointer
1610 (typically a TLB entry) back to a ram offset. */
1611 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1613 RAMBlock *block;
1614 uint8_t *host = ptr;
1616 if (xen_enabled()) {
1617 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1618 return qemu_get_ram_block(*ram_addr)->mr;
1621 block = ram_list.mru_block;
1622 if (block && block->host && host - block->host < block->length) {
1623 goto found;
1626 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1627 /* This case append when the block is not mapped. */
1628 if (block->host == NULL) {
1629 continue;
1631 if (host - block->host < block->length) {
1632 goto found;
1636 return NULL;
1638 found:
1639 *ram_addr = block->offset + (host - block->host);
1640 return block->mr;
1643 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1644 uint64_t val, unsigned size)
1646 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1647 tb_invalidate_phys_page_fast(ram_addr, size);
1649 switch (size) {
1650 case 1:
1651 stb_p(qemu_get_ram_ptr(ram_addr), val);
1652 break;
1653 case 2:
1654 stw_p(qemu_get_ram_ptr(ram_addr), val);
1655 break;
1656 case 4:
1657 stl_p(qemu_get_ram_ptr(ram_addr), val);
1658 break;
1659 default:
1660 abort();
1662 cpu_physical_memory_set_dirty_range_nocode(ram_addr, size);
1663 /* we remove the notdirty callback only if the code has been
1664 flushed */
1665 if (!cpu_physical_memory_is_clean(ram_addr)) {
1666 CPUArchState *env = current_cpu->env_ptr;
1667 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1671 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1672 unsigned size, bool is_write)
1674 return is_write;
1677 static const MemoryRegionOps notdirty_mem_ops = {
1678 .write = notdirty_mem_write,
1679 .valid.accepts = notdirty_mem_accepts,
1680 .endianness = DEVICE_NATIVE_ENDIAN,
1683 /* Generate a debug exception if a watchpoint has been hit. */
1684 static void check_watchpoint(int offset, int len, int flags)
1686 CPUState *cpu = current_cpu;
1687 CPUArchState *env = cpu->env_ptr;
1688 target_ulong pc, cs_base;
1689 target_ulong vaddr;
1690 CPUWatchpoint *wp;
1691 int cpu_flags;
1693 if (cpu->watchpoint_hit) {
1694 /* We re-entered the check after replacing the TB. Now raise
1695 * the debug interrupt so that is will trigger after the
1696 * current instruction. */
1697 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1698 return;
1700 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1701 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1702 if (cpu_watchpoint_address_matches(wp, vaddr, len)
1703 && (wp->flags & flags)) {
1704 if (flags == BP_MEM_READ) {
1705 wp->flags |= BP_WATCHPOINT_HIT_READ;
1706 } else {
1707 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
1709 wp->hitaddr = vaddr;
1710 if (!cpu->watchpoint_hit) {
1711 cpu->watchpoint_hit = wp;
1712 tb_check_watchpoint(cpu);
1713 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1714 cpu->exception_index = EXCP_DEBUG;
1715 cpu_loop_exit(cpu);
1716 } else {
1717 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1718 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1719 cpu_resume_from_signal(cpu, NULL);
1722 } else {
1723 wp->flags &= ~BP_WATCHPOINT_HIT;
1728 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1729 so these check for a hit then pass through to the normal out-of-line
1730 phys routines. */
1731 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1732 unsigned size)
1734 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_READ);
1735 switch (size) {
1736 case 1: return ldub_phys(&address_space_memory, addr);
1737 case 2: return lduw_phys(&address_space_memory, addr);
1738 case 4: return ldl_phys(&address_space_memory, addr);
1739 default: abort();
1743 static void watch_mem_write(void *opaque, hwaddr addr,
1744 uint64_t val, unsigned size)
1746 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_WRITE);
1747 switch (size) {
1748 case 1:
1749 stb_phys(&address_space_memory, addr, val);
1750 break;
1751 case 2:
1752 stw_phys(&address_space_memory, addr, val);
1753 break;
1754 case 4:
1755 stl_phys(&address_space_memory, addr, val);
1756 break;
1757 default: abort();
1761 static const MemoryRegionOps watch_mem_ops = {
1762 .read = watch_mem_read,
1763 .write = watch_mem_write,
1764 .endianness = DEVICE_NATIVE_ENDIAN,
1767 static uint64_t subpage_read(void *opaque, hwaddr addr,
1768 unsigned len)
1770 subpage_t *subpage = opaque;
1771 uint8_t buf[8];
1773 #if defined(DEBUG_SUBPAGE)
1774 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1775 subpage, len, addr);
1776 #endif
1777 address_space_read(subpage->as, addr + subpage->base, buf, len);
1778 switch (len) {
1779 case 1:
1780 return ldub_p(buf);
1781 case 2:
1782 return lduw_p(buf);
1783 case 4:
1784 return ldl_p(buf);
1785 case 8:
1786 return ldq_p(buf);
1787 default:
1788 abort();
1792 static void subpage_write(void *opaque, hwaddr addr,
1793 uint64_t value, unsigned len)
1795 subpage_t *subpage = opaque;
1796 uint8_t buf[8];
1798 #if defined(DEBUG_SUBPAGE)
1799 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1800 " value %"PRIx64"\n",
1801 __func__, subpage, len, addr, value);
1802 #endif
1803 switch (len) {
1804 case 1:
1805 stb_p(buf, value);
1806 break;
1807 case 2:
1808 stw_p(buf, value);
1809 break;
1810 case 4:
1811 stl_p(buf, value);
1812 break;
1813 case 8:
1814 stq_p(buf, value);
1815 break;
1816 default:
1817 abort();
1819 address_space_write(subpage->as, addr + subpage->base, buf, len);
1822 static bool subpage_accepts(void *opaque, hwaddr addr,
1823 unsigned len, bool is_write)
1825 subpage_t *subpage = opaque;
1826 #if defined(DEBUG_SUBPAGE)
1827 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1828 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1829 #endif
1831 return address_space_access_valid(subpage->as, addr + subpage->base,
1832 len, is_write);
1835 static const MemoryRegionOps subpage_ops = {
1836 .read = subpage_read,
1837 .write = subpage_write,
1838 .impl.min_access_size = 1,
1839 .impl.max_access_size = 8,
1840 .valid.min_access_size = 1,
1841 .valid.max_access_size = 8,
1842 .valid.accepts = subpage_accepts,
1843 .endianness = DEVICE_NATIVE_ENDIAN,
1846 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1847 uint16_t section)
1849 int idx, eidx;
1851 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1852 return -1;
1853 idx = SUBPAGE_IDX(start);
1854 eidx = SUBPAGE_IDX(end);
1855 #if defined(DEBUG_SUBPAGE)
1856 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1857 __func__, mmio, start, end, idx, eidx, section);
1858 #endif
1859 for (; idx <= eidx; idx++) {
1860 mmio->sub_section[idx] = section;
1863 return 0;
1866 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1868 subpage_t *mmio;
1870 mmio = g_malloc0(sizeof(subpage_t));
1872 mmio->as = as;
1873 mmio->base = base;
1874 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1875 NULL, TARGET_PAGE_SIZE);
1876 mmio->iomem.subpage = true;
1877 #if defined(DEBUG_SUBPAGE)
1878 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1879 mmio, base, TARGET_PAGE_SIZE);
1880 #endif
1881 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1883 return mmio;
1886 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
1887 MemoryRegion *mr)
1889 assert(as);
1890 MemoryRegionSection section = {
1891 .address_space = as,
1892 .mr = mr,
1893 .offset_within_address_space = 0,
1894 .offset_within_region = 0,
1895 .size = int128_2_64(),
1898 return phys_section_add(map, &section);
1901 MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1903 return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1906 static void io_mem_init(void)
1908 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
1909 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1910 NULL, UINT64_MAX);
1911 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1912 NULL, UINT64_MAX);
1913 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1914 NULL, UINT64_MAX);
1917 static void mem_begin(MemoryListener *listener)
1919 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1920 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1921 uint16_t n;
1923 n = dummy_section(&d->map, as, &io_mem_unassigned);
1924 assert(n == PHYS_SECTION_UNASSIGNED);
1925 n = dummy_section(&d->map, as, &io_mem_notdirty);
1926 assert(n == PHYS_SECTION_NOTDIRTY);
1927 n = dummy_section(&d->map, as, &io_mem_rom);
1928 assert(n == PHYS_SECTION_ROM);
1929 n = dummy_section(&d->map, as, &io_mem_watch);
1930 assert(n == PHYS_SECTION_WATCH);
1932 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1933 d->as = as;
1934 as->next_dispatch = d;
1937 static void mem_commit(MemoryListener *listener)
1939 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1940 AddressSpaceDispatch *cur = as->dispatch;
1941 AddressSpaceDispatch *next = as->next_dispatch;
1943 phys_page_compact_all(next, next->map.nodes_nb);
1945 as->dispatch = next;
1947 if (cur) {
1948 phys_sections_free(&cur->map);
1949 g_free(cur);
1953 static void tcg_commit(MemoryListener *listener)
1955 CPUState *cpu;
1957 /* since each CPU stores ram addresses in its TLB cache, we must
1958 reset the modified entries */
1959 /* XXX: slow ! */
1960 CPU_FOREACH(cpu) {
1961 /* FIXME: Disentangle the cpu.h circular files deps so we can
1962 directly get the right CPU from listener. */
1963 if (cpu->tcg_as_listener != listener) {
1964 continue;
1966 tlb_flush(cpu, 1);
1970 static void core_log_global_start(MemoryListener *listener)
1972 cpu_physical_memory_set_dirty_tracking(true);
1975 static void core_log_global_stop(MemoryListener *listener)
1977 cpu_physical_memory_set_dirty_tracking(false);
1980 static MemoryListener core_memory_listener = {
1981 .log_global_start = core_log_global_start,
1982 .log_global_stop = core_log_global_stop,
1983 .priority = 1,
1986 void address_space_init_dispatch(AddressSpace *as)
1988 as->dispatch = NULL;
1989 as->dispatch_listener = (MemoryListener) {
1990 .begin = mem_begin,
1991 .commit = mem_commit,
1992 .region_add = mem_add,
1993 .region_nop = mem_add,
1994 .priority = 0,
1996 memory_listener_register(&as->dispatch_listener, as);
1999 void address_space_destroy_dispatch(AddressSpace *as)
2001 AddressSpaceDispatch *d = as->dispatch;
2003 memory_listener_unregister(&as->dispatch_listener);
2004 g_free(d);
2005 as->dispatch = NULL;
2008 static void memory_map_init(void)
2010 system_memory = g_malloc(sizeof(*system_memory));
2012 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2013 address_space_init(&address_space_memory, system_memory, "memory");
2015 system_io = g_malloc(sizeof(*system_io));
2016 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2017 65536);
2018 address_space_init(&address_space_io, system_io, "I/O");
2020 memory_listener_register(&core_memory_listener, &address_space_memory);
2023 MemoryRegion *get_system_memory(void)
2025 return system_memory;
2028 MemoryRegion *get_system_io(void)
2030 return system_io;
2033 #endif /* !defined(CONFIG_USER_ONLY) */
2035 /* physical memory access (slow version, mainly for debug) */
2036 #if defined(CONFIG_USER_ONLY)
2037 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2038 uint8_t *buf, int len, int is_write)
2040 int l, flags;
2041 target_ulong page;
2042 void * p;
2044 while (len > 0) {
2045 page = addr & TARGET_PAGE_MASK;
2046 l = (page + TARGET_PAGE_SIZE) - addr;
2047 if (l > len)
2048 l = len;
2049 flags = page_get_flags(page);
2050 if (!(flags & PAGE_VALID))
2051 return -1;
2052 if (is_write) {
2053 if (!(flags & PAGE_WRITE))
2054 return -1;
2055 /* XXX: this code should not depend on lock_user */
2056 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2057 return -1;
2058 memcpy(p, buf, l);
2059 unlock_user(p, addr, l);
2060 } else {
2061 if (!(flags & PAGE_READ))
2062 return -1;
2063 /* XXX: this code should not depend on lock_user */
2064 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2065 return -1;
2066 memcpy(buf, p, l);
2067 unlock_user(p, addr, 0);
2069 len -= l;
2070 buf += l;
2071 addr += l;
2073 return 0;
2076 #else
2078 static void invalidate_and_set_dirty(hwaddr addr,
2079 hwaddr length)
2081 if (cpu_physical_memory_range_includes_clean(addr, length)) {
2082 tb_invalidate_phys_range(addr, addr + length, 0);
2083 cpu_physical_memory_set_dirty_range_nocode(addr, length);
2085 xen_modified_memory(addr, length);
2088 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2090 unsigned access_size_max = mr->ops->valid.max_access_size;
2092 /* Regions are assumed to support 1-4 byte accesses unless
2093 otherwise specified. */
2094 if (access_size_max == 0) {
2095 access_size_max = 4;
2098 /* Bound the maximum access by the alignment of the address. */
2099 if (!mr->ops->impl.unaligned) {
2100 unsigned align_size_max = addr & -addr;
2101 if (align_size_max != 0 && align_size_max < access_size_max) {
2102 access_size_max = align_size_max;
2106 /* Don't attempt accesses larger than the maximum. */
2107 if (l > access_size_max) {
2108 l = access_size_max;
2110 if (l & (l - 1)) {
2111 l = 1 << (qemu_fls(l) - 1);
2114 return l;
2117 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
2118 int len, bool is_write)
2120 hwaddr l;
2121 uint8_t *ptr;
2122 uint64_t val;
2123 hwaddr addr1;
2124 MemoryRegion *mr;
2125 bool error = false;
2127 while (len > 0) {
2128 l = len;
2129 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2131 if (is_write) {
2132 if (!memory_access_is_direct(mr, is_write)) {
2133 l = memory_access_size(mr, l, addr1);
2134 /* XXX: could force current_cpu to NULL to avoid
2135 potential bugs */
2136 switch (l) {
2137 case 8:
2138 /* 64 bit write access */
2139 val = ldq_p(buf);
2140 error |= io_mem_write(mr, addr1, val, 8);
2141 break;
2142 case 4:
2143 /* 32 bit write access */
2144 val = ldl_p(buf);
2145 error |= io_mem_write(mr, addr1, val, 4);
2146 break;
2147 case 2:
2148 /* 16 bit write access */
2149 val = lduw_p(buf);
2150 error |= io_mem_write(mr, addr1, val, 2);
2151 break;
2152 case 1:
2153 /* 8 bit write access */
2154 val = ldub_p(buf);
2155 error |= io_mem_write(mr, addr1, val, 1);
2156 break;
2157 default:
2158 abort();
2160 } else {
2161 addr1 += memory_region_get_ram_addr(mr);
2162 /* RAM case */
2163 ptr = qemu_get_ram_ptr(addr1);
2164 memcpy(ptr, buf, l);
2165 invalidate_and_set_dirty(addr1, l);
2167 } else {
2168 if (!memory_access_is_direct(mr, is_write)) {
2169 /* I/O case */
2170 l = memory_access_size(mr, l, addr1);
2171 switch (l) {
2172 case 8:
2173 /* 64 bit read access */
2174 error |= io_mem_read(mr, addr1, &val, 8);
2175 stq_p(buf, val);
2176 break;
2177 case 4:
2178 /* 32 bit read access */
2179 error |= io_mem_read(mr, addr1, &val, 4);
2180 stl_p(buf, val);
2181 break;
2182 case 2:
2183 /* 16 bit read access */
2184 error |= io_mem_read(mr, addr1, &val, 2);
2185 stw_p(buf, val);
2186 break;
2187 case 1:
2188 /* 8 bit read access */
2189 error |= io_mem_read(mr, addr1, &val, 1);
2190 stb_p(buf, val);
2191 break;
2192 default:
2193 abort();
2195 } else {
2196 /* RAM case */
2197 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2198 memcpy(buf, ptr, l);
2201 len -= l;
2202 buf += l;
2203 addr += l;
2206 return error;
2209 bool address_space_write(AddressSpace *as, hwaddr addr,
2210 const uint8_t *buf, int len)
2212 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2215 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2217 return address_space_rw(as, addr, buf, len, false);
2221 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2222 int len, int is_write)
2224 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2227 enum write_rom_type {
2228 WRITE_DATA,
2229 FLUSH_CACHE,
2232 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2233 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2235 hwaddr l;
2236 uint8_t *ptr;
2237 hwaddr addr1;
2238 MemoryRegion *mr;
2240 while (len > 0) {
2241 l = len;
2242 mr = address_space_translate(as, addr, &addr1, &l, true);
2244 if (!(memory_region_is_ram(mr) ||
2245 memory_region_is_romd(mr))) {
2246 /* do nothing */
2247 } else {
2248 addr1 += memory_region_get_ram_addr(mr);
2249 /* ROM/RAM case */
2250 ptr = qemu_get_ram_ptr(addr1);
2251 switch (type) {
2252 case WRITE_DATA:
2253 memcpy(ptr, buf, l);
2254 invalidate_and_set_dirty(addr1, l);
2255 break;
2256 case FLUSH_CACHE:
2257 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2258 break;
2261 len -= l;
2262 buf += l;
2263 addr += l;
2267 /* used for ROM loading : can write in RAM and ROM */
2268 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2269 const uint8_t *buf, int len)
2271 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2274 void cpu_flush_icache_range(hwaddr start, int len)
2277 * This function should do the same thing as an icache flush that was
2278 * triggered from within the guest. For TCG we are always cache coherent,
2279 * so there is no need to flush anything. For KVM / Xen we need to flush
2280 * the host's instruction cache at least.
2282 if (tcg_enabled()) {
2283 return;
2286 cpu_physical_memory_write_rom_internal(&address_space_memory,
2287 start, NULL, len, FLUSH_CACHE);
2290 typedef struct {
2291 MemoryRegion *mr;
2292 void *buffer;
2293 hwaddr addr;
2294 hwaddr len;
2295 } BounceBuffer;
2297 static BounceBuffer bounce;
2299 typedef struct MapClient {
2300 void *opaque;
2301 void (*callback)(void *opaque);
2302 QLIST_ENTRY(MapClient) link;
2303 } MapClient;
2305 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2306 = QLIST_HEAD_INITIALIZER(map_client_list);
2308 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2310 MapClient *client = g_malloc(sizeof(*client));
2312 client->opaque = opaque;
2313 client->callback = callback;
2314 QLIST_INSERT_HEAD(&map_client_list, client, link);
2315 return client;
2318 static void cpu_unregister_map_client(void *_client)
2320 MapClient *client = (MapClient *)_client;
2322 QLIST_REMOVE(client, link);
2323 g_free(client);
2326 static void cpu_notify_map_clients(void)
2328 MapClient *client;
2330 while (!QLIST_EMPTY(&map_client_list)) {
2331 client = QLIST_FIRST(&map_client_list);
2332 client->callback(client->opaque);
2333 cpu_unregister_map_client(client);
2337 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2339 MemoryRegion *mr;
2340 hwaddr l, xlat;
2342 while (len > 0) {
2343 l = len;
2344 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2345 if (!memory_access_is_direct(mr, is_write)) {
2346 l = memory_access_size(mr, l, addr);
2347 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2348 return false;
2352 len -= l;
2353 addr += l;
2355 return true;
2358 /* Map a physical memory region into a host virtual address.
2359 * May map a subset of the requested range, given by and returned in *plen.
2360 * May return NULL if resources needed to perform the mapping are exhausted.
2361 * Use only for reads OR writes - not for read-modify-write operations.
2362 * Use cpu_register_map_client() to know when retrying the map operation is
2363 * likely to succeed.
2365 void *address_space_map(AddressSpace *as,
2366 hwaddr addr,
2367 hwaddr *plen,
2368 bool is_write)
2370 hwaddr len = *plen;
2371 hwaddr done = 0;
2372 hwaddr l, xlat, base;
2373 MemoryRegion *mr, *this_mr;
2374 ram_addr_t raddr;
2376 if (len == 0) {
2377 return NULL;
2380 l = len;
2381 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2382 if (!memory_access_is_direct(mr, is_write)) {
2383 if (bounce.buffer) {
2384 return NULL;
2386 /* Avoid unbounded allocations */
2387 l = MIN(l, TARGET_PAGE_SIZE);
2388 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2389 bounce.addr = addr;
2390 bounce.len = l;
2392 memory_region_ref(mr);
2393 bounce.mr = mr;
2394 if (!is_write) {
2395 address_space_read(as, addr, bounce.buffer, l);
2398 *plen = l;
2399 return bounce.buffer;
2402 base = xlat;
2403 raddr = memory_region_get_ram_addr(mr);
2405 for (;;) {
2406 len -= l;
2407 addr += l;
2408 done += l;
2409 if (len == 0) {
2410 break;
2413 l = len;
2414 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2415 if (this_mr != mr || xlat != base + done) {
2416 break;
2420 memory_region_ref(mr);
2421 *plen = done;
2422 return qemu_ram_ptr_length(raddr + base, plen);
2425 /* Unmaps a memory region previously mapped by address_space_map().
2426 * Will also mark the memory as dirty if is_write == 1. access_len gives
2427 * the amount of memory that was actually read or written by the caller.
2429 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2430 int is_write, hwaddr access_len)
2432 if (buffer != bounce.buffer) {
2433 MemoryRegion *mr;
2434 ram_addr_t addr1;
2436 mr = qemu_ram_addr_from_host(buffer, &addr1);
2437 assert(mr != NULL);
2438 if (is_write) {
2439 invalidate_and_set_dirty(addr1, access_len);
2441 if (xen_enabled()) {
2442 xen_invalidate_map_cache_entry(buffer);
2444 memory_region_unref(mr);
2445 return;
2447 if (is_write) {
2448 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2450 qemu_vfree(bounce.buffer);
2451 bounce.buffer = NULL;
2452 memory_region_unref(bounce.mr);
2453 cpu_notify_map_clients();
2456 void *cpu_physical_memory_map(hwaddr addr,
2457 hwaddr *plen,
2458 int is_write)
2460 return address_space_map(&address_space_memory, addr, plen, is_write);
2463 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2464 int is_write, hwaddr access_len)
2466 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2469 /* warning: addr must be aligned */
2470 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2471 enum device_endian endian)
2473 uint8_t *ptr;
2474 uint64_t val;
2475 MemoryRegion *mr;
2476 hwaddr l = 4;
2477 hwaddr addr1;
2479 mr = address_space_translate(as, addr, &addr1, &l, false);
2480 if (l < 4 || !memory_access_is_direct(mr, false)) {
2481 /* I/O case */
2482 io_mem_read(mr, addr1, &val, 4);
2483 #if defined(TARGET_WORDS_BIGENDIAN)
2484 if (endian == DEVICE_LITTLE_ENDIAN) {
2485 val = bswap32(val);
2487 #else
2488 if (endian == DEVICE_BIG_ENDIAN) {
2489 val = bswap32(val);
2491 #endif
2492 } else {
2493 /* RAM case */
2494 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2495 & TARGET_PAGE_MASK)
2496 + addr1);
2497 switch (endian) {
2498 case DEVICE_LITTLE_ENDIAN:
2499 val = ldl_le_p(ptr);
2500 break;
2501 case DEVICE_BIG_ENDIAN:
2502 val = ldl_be_p(ptr);
2503 break;
2504 default:
2505 val = ldl_p(ptr);
2506 break;
2509 return val;
2512 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2514 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2517 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2519 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2522 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2524 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2527 /* warning: addr must be aligned */
2528 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2529 enum device_endian endian)
2531 uint8_t *ptr;
2532 uint64_t val;
2533 MemoryRegion *mr;
2534 hwaddr l = 8;
2535 hwaddr addr1;
2537 mr = address_space_translate(as, addr, &addr1, &l,
2538 false);
2539 if (l < 8 || !memory_access_is_direct(mr, false)) {
2540 /* I/O case */
2541 io_mem_read(mr, addr1, &val, 8);
2542 #if defined(TARGET_WORDS_BIGENDIAN)
2543 if (endian == DEVICE_LITTLE_ENDIAN) {
2544 val = bswap64(val);
2546 #else
2547 if (endian == DEVICE_BIG_ENDIAN) {
2548 val = bswap64(val);
2550 #endif
2551 } else {
2552 /* RAM case */
2553 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2554 & TARGET_PAGE_MASK)
2555 + addr1);
2556 switch (endian) {
2557 case DEVICE_LITTLE_ENDIAN:
2558 val = ldq_le_p(ptr);
2559 break;
2560 case DEVICE_BIG_ENDIAN:
2561 val = ldq_be_p(ptr);
2562 break;
2563 default:
2564 val = ldq_p(ptr);
2565 break;
2568 return val;
2571 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2573 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2576 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2578 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2581 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2583 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2586 /* XXX: optimize */
2587 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2589 uint8_t val;
2590 address_space_rw(as, addr, &val, 1, 0);
2591 return val;
2594 /* warning: addr must be aligned */
2595 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2596 enum device_endian endian)
2598 uint8_t *ptr;
2599 uint64_t val;
2600 MemoryRegion *mr;
2601 hwaddr l = 2;
2602 hwaddr addr1;
2604 mr = address_space_translate(as, addr, &addr1, &l,
2605 false);
2606 if (l < 2 || !memory_access_is_direct(mr, false)) {
2607 /* I/O case */
2608 io_mem_read(mr, addr1, &val, 2);
2609 #if defined(TARGET_WORDS_BIGENDIAN)
2610 if (endian == DEVICE_LITTLE_ENDIAN) {
2611 val = bswap16(val);
2613 #else
2614 if (endian == DEVICE_BIG_ENDIAN) {
2615 val = bswap16(val);
2617 #endif
2618 } else {
2619 /* RAM case */
2620 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2621 & TARGET_PAGE_MASK)
2622 + addr1);
2623 switch (endian) {
2624 case DEVICE_LITTLE_ENDIAN:
2625 val = lduw_le_p(ptr);
2626 break;
2627 case DEVICE_BIG_ENDIAN:
2628 val = lduw_be_p(ptr);
2629 break;
2630 default:
2631 val = lduw_p(ptr);
2632 break;
2635 return val;
2638 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2640 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2643 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2645 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2648 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2650 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2653 /* warning: addr must be aligned. The ram page is not masked as dirty
2654 and the code inside is not invalidated. It is useful if the dirty
2655 bits are used to track modified PTEs */
2656 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2658 uint8_t *ptr;
2659 MemoryRegion *mr;
2660 hwaddr l = 4;
2661 hwaddr addr1;
2663 mr = address_space_translate(as, addr, &addr1, &l,
2664 true);
2665 if (l < 4 || !memory_access_is_direct(mr, true)) {
2666 io_mem_write(mr, addr1, val, 4);
2667 } else {
2668 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2669 ptr = qemu_get_ram_ptr(addr1);
2670 stl_p(ptr, val);
2672 if (unlikely(in_migration)) {
2673 if (cpu_physical_memory_is_clean(addr1)) {
2674 /* invalidate code */
2675 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2676 /* set dirty bit */
2677 cpu_physical_memory_set_dirty_range_nocode(addr1, 4);
2683 /* warning: addr must be aligned */
2684 static inline void stl_phys_internal(AddressSpace *as,
2685 hwaddr addr, uint32_t val,
2686 enum device_endian endian)
2688 uint8_t *ptr;
2689 MemoryRegion *mr;
2690 hwaddr l = 4;
2691 hwaddr addr1;
2693 mr = address_space_translate(as, addr, &addr1, &l,
2694 true);
2695 if (l < 4 || !memory_access_is_direct(mr, true)) {
2696 #if defined(TARGET_WORDS_BIGENDIAN)
2697 if (endian == DEVICE_LITTLE_ENDIAN) {
2698 val = bswap32(val);
2700 #else
2701 if (endian == DEVICE_BIG_ENDIAN) {
2702 val = bswap32(val);
2704 #endif
2705 io_mem_write(mr, addr1, val, 4);
2706 } else {
2707 /* RAM case */
2708 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2709 ptr = qemu_get_ram_ptr(addr1);
2710 switch (endian) {
2711 case DEVICE_LITTLE_ENDIAN:
2712 stl_le_p(ptr, val);
2713 break;
2714 case DEVICE_BIG_ENDIAN:
2715 stl_be_p(ptr, val);
2716 break;
2717 default:
2718 stl_p(ptr, val);
2719 break;
2721 invalidate_and_set_dirty(addr1, 4);
2725 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2727 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2730 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2732 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2735 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2737 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2740 /* XXX: optimize */
2741 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2743 uint8_t v = val;
2744 address_space_rw(as, addr, &v, 1, 1);
2747 /* warning: addr must be aligned */
2748 static inline void stw_phys_internal(AddressSpace *as,
2749 hwaddr addr, uint32_t val,
2750 enum device_endian endian)
2752 uint8_t *ptr;
2753 MemoryRegion *mr;
2754 hwaddr l = 2;
2755 hwaddr addr1;
2757 mr = address_space_translate(as, addr, &addr1, &l, true);
2758 if (l < 2 || !memory_access_is_direct(mr, true)) {
2759 #if defined(TARGET_WORDS_BIGENDIAN)
2760 if (endian == DEVICE_LITTLE_ENDIAN) {
2761 val = bswap16(val);
2763 #else
2764 if (endian == DEVICE_BIG_ENDIAN) {
2765 val = bswap16(val);
2767 #endif
2768 io_mem_write(mr, addr1, val, 2);
2769 } else {
2770 /* RAM case */
2771 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2772 ptr = qemu_get_ram_ptr(addr1);
2773 switch (endian) {
2774 case DEVICE_LITTLE_ENDIAN:
2775 stw_le_p(ptr, val);
2776 break;
2777 case DEVICE_BIG_ENDIAN:
2778 stw_be_p(ptr, val);
2779 break;
2780 default:
2781 stw_p(ptr, val);
2782 break;
2784 invalidate_and_set_dirty(addr1, 2);
2788 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2790 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2793 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2795 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2798 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2800 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2803 /* XXX: optimize */
2804 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2806 val = tswap64(val);
2807 address_space_rw(as, addr, (void *) &val, 8, 1);
2810 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2812 val = cpu_to_le64(val);
2813 address_space_rw(as, addr, (void *) &val, 8, 1);
2816 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2818 val = cpu_to_be64(val);
2819 address_space_rw(as, addr, (void *) &val, 8, 1);
2822 /* virtual memory access for debug (includes writing to ROM) */
2823 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2824 uint8_t *buf, int len, int is_write)
2826 int l;
2827 hwaddr phys_addr;
2828 target_ulong page;
2830 while (len > 0) {
2831 page = addr & TARGET_PAGE_MASK;
2832 phys_addr = cpu_get_phys_page_debug(cpu, page);
2833 /* if no physical page mapped, return an error */
2834 if (phys_addr == -1)
2835 return -1;
2836 l = (page + TARGET_PAGE_SIZE) - addr;
2837 if (l > len)
2838 l = len;
2839 phys_addr += (addr & ~TARGET_PAGE_MASK);
2840 if (is_write) {
2841 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
2842 } else {
2843 address_space_rw(cpu->as, phys_addr, buf, l, 0);
2845 len -= l;
2846 buf += l;
2847 addr += l;
2849 return 0;
2851 #endif
2854 * A helper function for the _utterly broken_ virtio device model to find out if
2855 * it's running on a big endian machine. Don't do this at home kids!
2857 bool target_words_bigendian(void);
2858 bool target_words_bigendian(void)
2860 #if defined(TARGET_WORDS_BIGENDIAN)
2861 return true;
2862 #else
2863 return false;
2864 #endif
2867 #ifndef CONFIG_USER_ONLY
2868 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2870 MemoryRegion*mr;
2871 hwaddr l = 1;
2873 mr = address_space_translate(&address_space_memory,
2874 phys_addr, &phys_addr, &l, false);
2876 return !(memory_region_is_ram(mr) ||
2877 memory_region_is_romd(mr));
2880 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2882 RAMBlock *block;
2884 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2885 func(block->host, block->offset, block->length, opaque);
2888 #endif