vmxnet3: add bootindex to qom property
[qemu-kvm.git] / exec.c
blob759055d0e38a160e3eae3ff8a25564d5d380eeaf
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #include "hw/qdev.h"
30 #include "qemu/osdep.h"
31 #include "sysemu/kvm.h"
32 #include "sysemu/sysemu.h"
33 #include "hw/xen/xen.h"
34 #include "qemu/timer.h"
35 #include "qemu/config-file.h"
36 #include "qemu/error-report.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
51 #include "exec/memory-internal.h"
52 #include "exec/ram_addr.h"
54 #include "qemu/range.h"
56 //#define DEBUG_SUBPAGE
58 #if !defined(CONFIG_USER_ONLY)
59 static bool in_migration;
61 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
63 static MemoryRegion *system_memory;
64 static MemoryRegion *system_io;
66 AddressSpace address_space_io;
67 AddressSpace address_space_memory;
69 MemoryRegion io_mem_rom, io_mem_notdirty;
70 static MemoryRegion io_mem_unassigned;
72 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
73 #define RAM_PREALLOC (1 << 0)
75 /* RAM is mmap-ed with MAP_SHARED */
76 #define RAM_SHARED (1 << 1)
78 #endif
80 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
81 /* current CPU in the current thread. It is only valid inside
82 cpu_exec() */
83 DEFINE_TLS(CPUState *, current_cpu);
84 /* 0 = Do not count executed instructions.
85 1 = Precise instruction counting.
86 2 = Adaptive rate instruction counting. */
87 int use_icount;
89 #if !defined(CONFIG_USER_ONLY)
91 typedef struct PhysPageEntry PhysPageEntry;
93 struct PhysPageEntry {
94 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
95 uint32_t skip : 6;
96 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
97 uint32_t ptr : 26;
100 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
102 /* Size of the L2 (and L3, etc) page tables. */
103 #define ADDR_SPACE_BITS 64
105 #define P_L2_BITS 9
106 #define P_L2_SIZE (1 << P_L2_BITS)
108 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
110 typedef PhysPageEntry Node[P_L2_SIZE];
112 typedef struct PhysPageMap {
113 unsigned sections_nb;
114 unsigned sections_nb_alloc;
115 unsigned nodes_nb;
116 unsigned nodes_nb_alloc;
117 Node *nodes;
118 MemoryRegionSection *sections;
119 } PhysPageMap;
121 struct AddressSpaceDispatch {
122 /* This is a multi-level map on the physical address space.
123 * The bottom level has pointers to MemoryRegionSections.
125 PhysPageEntry phys_map;
126 PhysPageMap map;
127 AddressSpace *as;
130 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
131 typedef struct subpage_t {
132 MemoryRegion iomem;
133 AddressSpace *as;
134 hwaddr base;
135 uint16_t sub_section[TARGET_PAGE_SIZE];
136 } subpage_t;
138 #define PHYS_SECTION_UNASSIGNED 0
139 #define PHYS_SECTION_NOTDIRTY 1
140 #define PHYS_SECTION_ROM 2
141 #define PHYS_SECTION_WATCH 3
143 static void io_mem_init(void);
144 static void memory_map_init(void);
145 static void tcg_commit(MemoryListener *listener);
147 static MemoryRegion io_mem_watch;
148 #endif
150 #if !defined(CONFIG_USER_ONLY)
152 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
154 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
155 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
156 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
157 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
161 static uint32_t phys_map_node_alloc(PhysPageMap *map)
163 unsigned i;
164 uint32_t ret;
166 ret = map->nodes_nb++;
167 assert(ret != PHYS_MAP_NODE_NIL);
168 assert(ret != map->nodes_nb_alloc);
169 for (i = 0; i < P_L2_SIZE; ++i) {
170 map->nodes[ret][i].skip = 1;
171 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
173 return ret;
176 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
177 hwaddr *index, hwaddr *nb, uint16_t leaf,
178 int level)
180 PhysPageEntry *p;
181 int i;
182 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
184 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
185 lp->ptr = phys_map_node_alloc(map);
186 p = map->nodes[lp->ptr];
187 if (level == 0) {
188 for (i = 0; i < P_L2_SIZE; i++) {
189 p[i].skip = 0;
190 p[i].ptr = PHYS_SECTION_UNASSIGNED;
193 } else {
194 p = map->nodes[lp->ptr];
196 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
198 while (*nb && lp < &p[P_L2_SIZE]) {
199 if ((*index & (step - 1)) == 0 && *nb >= step) {
200 lp->skip = 0;
201 lp->ptr = leaf;
202 *index += step;
203 *nb -= step;
204 } else {
205 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
207 ++lp;
211 static void phys_page_set(AddressSpaceDispatch *d,
212 hwaddr index, hwaddr nb,
213 uint16_t leaf)
215 /* Wildly overreserve - it doesn't matter much. */
216 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
218 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
221 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
222 * and update our entry so we can skip it and go directly to the destination.
224 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
226 unsigned valid_ptr = P_L2_SIZE;
227 int valid = 0;
228 PhysPageEntry *p;
229 int i;
231 if (lp->ptr == PHYS_MAP_NODE_NIL) {
232 return;
235 p = nodes[lp->ptr];
236 for (i = 0; i < P_L2_SIZE; i++) {
237 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
238 continue;
241 valid_ptr = i;
242 valid++;
243 if (p[i].skip) {
244 phys_page_compact(&p[i], nodes, compacted);
248 /* We can only compress if there's only one child. */
249 if (valid != 1) {
250 return;
253 assert(valid_ptr < P_L2_SIZE);
255 /* Don't compress if it won't fit in the # of bits we have. */
256 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
257 return;
260 lp->ptr = p[valid_ptr].ptr;
261 if (!p[valid_ptr].skip) {
262 /* If our only child is a leaf, make this a leaf. */
263 /* By design, we should have made this node a leaf to begin with so we
264 * should never reach here.
265 * But since it's so simple to handle this, let's do it just in case we
266 * change this rule.
268 lp->skip = 0;
269 } else {
270 lp->skip += p[valid_ptr].skip;
274 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
276 DECLARE_BITMAP(compacted, nodes_nb);
278 if (d->phys_map.skip) {
279 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
283 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
284 Node *nodes, MemoryRegionSection *sections)
286 PhysPageEntry *p;
287 hwaddr index = addr >> TARGET_PAGE_BITS;
288 int i;
290 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
291 if (lp.ptr == PHYS_MAP_NODE_NIL) {
292 return &sections[PHYS_SECTION_UNASSIGNED];
294 p = nodes[lp.ptr];
295 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
298 if (sections[lp.ptr].size.hi ||
299 range_covers_byte(sections[lp.ptr].offset_within_address_space,
300 sections[lp.ptr].size.lo, addr)) {
301 return &sections[lp.ptr];
302 } else {
303 return &sections[PHYS_SECTION_UNASSIGNED];
307 bool memory_region_is_unassigned(MemoryRegion *mr)
309 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
310 && mr != &io_mem_watch;
313 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
314 hwaddr addr,
315 bool resolve_subpage)
317 MemoryRegionSection *section;
318 subpage_t *subpage;
320 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
321 if (resolve_subpage && section->mr->subpage) {
322 subpage = container_of(section->mr, subpage_t, iomem);
323 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
325 return section;
328 static MemoryRegionSection *
329 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
330 hwaddr *plen, bool resolve_subpage)
332 MemoryRegionSection *section;
333 Int128 diff;
335 section = address_space_lookup_region(d, addr, resolve_subpage);
336 /* Compute offset within MemoryRegionSection */
337 addr -= section->offset_within_address_space;
339 /* Compute offset within MemoryRegion */
340 *xlat = addr + section->offset_within_region;
342 diff = int128_sub(section->mr->size, int128_make64(addr));
343 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
344 return section;
347 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
349 if (memory_region_is_ram(mr)) {
350 return !(is_write && mr->readonly);
352 if (memory_region_is_romd(mr)) {
353 return !is_write;
356 return false;
359 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
360 hwaddr *xlat, hwaddr *plen,
361 bool is_write)
363 IOMMUTLBEntry iotlb;
364 MemoryRegionSection *section;
365 MemoryRegion *mr;
366 hwaddr len = *plen;
368 for (;;) {
369 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
370 mr = section->mr;
372 if (!mr->iommu_ops) {
373 break;
376 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
377 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
378 | (addr & iotlb.addr_mask));
379 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
380 if (!(iotlb.perm & (1 << is_write))) {
381 mr = &io_mem_unassigned;
382 break;
385 as = iotlb.target_as;
388 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
389 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
390 len = MIN(page, len);
393 *plen = len;
394 *xlat = addr;
395 return mr;
398 MemoryRegionSection *
399 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
400 hwaddr *plen)
402 MemoryRegionSection *section;
403 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
405 assert(!section->mr->iommu_ops);
406 return section;
408 #endif
410 void cpu_exec_init_all(void)
412 #if !defined(CONFIG_USER_ONLY)
413 qemu_mutex_init(&ram_list.mutex);
414 memory_map_init();
415 io_mem_init();
416 #endif
419 #if !defined(CONFIG_USER_ONLY)
421 static int cpu_common_post_load(void *opaque, int version_id)
423 CPUState *cpu = opaque;
425 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
426 version_id is increased. */
427 cpu->interrupt_request &= ~0x01;
428 tlb_flush(cpu, 1);
430 return 0;
433 static int cpu_common_pre_load(void *opaque)
435 CPUState *cpu = opaque;
437 cpu->exception_index = 0;
439 return 0;
442 static bool cpu_common_exception_index_needed(void *opaque)
444 CPUState *cpu = opaque;
446 return cpu->exception_index != 0;
449 static const VMStateDescription vmstate_cpu_common_exception_index = {
450 .name = "cpu_common/exception_index",
451 .version_id = 1,
452 .minimum_version_id = 1,
453 .fields = (VMStateField[]) {
454 VMSTATE_INT32(exception_index, CPUState),
455 VMSTATE_END_OF_LIST()
459 const VMStateDescription vmstate_cpu_common = {
460 .name = "cpu_common",
461 .version_id = 1,
462 .minimum_version_id = 1,
463 .pre_load = cpu_common_pre_load,
464 .post_load = cpu_common_post_load,
465 .fields = (VMStateField[]) {
466 VMSTATE_UINT32(halted, CPUState),
467 VMSTATE_UINT32(interrupt_request, CPUState),
468 VMSTATE_END_OF_LIST()
470 .subsections = (VMStateSubsection[]) {
472 .vmsd = &vmstate_cpu_common_exception_index,
473 .needed = cpu_common_exception_index_needed,
474 } , {
475 /* empty */
480 #endif
482 CPUState *qemu_get_cpu(int index)
484 CPUState *cpu;
486 CPU_FOREACH(cpu) {
487 if (cpu->cpu_index == index) {
488 return cpu;
492 return NULL;
495 #if !defined(CONFIG_USER_ONLY)
496 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
498 /* We only support one address space per cpu at the moment. */
499 assert(cpu->as == as);
501 if (cpu->tcg_as_listener) {
502 memory_listener_unregister(cpu->tcg_as_listener);
503 } else {
504 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
506 cpu->tcg_as_listener->commit = tcg_commit;
507 memory_listener_register(cpu->tcg_as_listener, as);
509 #endif
511 void cpu_exec_init(CPUArchState *env)
513 CPUState *cpu = ENV_GET_CPU(env);
514 CPUClass *cc = CPU_GET_CLASS(cpu);
515 CPUState *some_cpu;
516 int cpu_index;
518 #if defined(CONFIG_USER_ONLY)
519 cpu_list_lock();
520 #endif
521 cpu_index = 0;
522 CPU_FOREACH(some_cpu) {
523 cpu_index++;
525 cpu->cpu_index = cpu_index;
526 cpu->numa_node = 0;
527 QTAILQ_INIT(&cpu->breakpoints);
528 QTAILQ_INIT(&cpu->watchpoints);
529 #ifndef CONFIG_USER_ONLY
530 cpu->as = &address_space_memory;
531 cpu->thread_id = qemu_get_thread_id();
532 #endif
533 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
534 #if defined(CONFIG_USER_ONLY)
535 cpu_list_unlock();
536 #endif
537 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
538 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
540 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
541 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
542 cpu_save, cpu_load, env);
543 assert(cc->vmsd == NULL);
544 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
545 #endif
546 if (cc->vmsd != NULL) {
547 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
551 #if defined(TARGET_HAS_ICE)
552 #if defined(CONFIG_USER_ONLY)
553 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
555 tb_invalidate_phys_page_range(pc, pc + 1, 0);
557 #else
558 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
560 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
561 if (phys != -1) {
562 tb_invalidate_phys_addr(cpu->as,
563 phys | (pc & ~TARGET_PAGE_MASK));
566 #endif
567 #endif /* TARGET_HAS_ICE */
569 #if defined(CONFIG_USER_ONLY)
570 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
575 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
576 int flags)
578 return -ENOSYS;
581 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
585 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
586 int flags, CPUWatchpoint **watchpoint)
588 return -ENOSYS;
590 #else
591 /* Add a watchpoint. */
592 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
593 int flags, CPUWatchpoint **watchpoint)
595 CPUWatchpoint *wp;
597 /* forbid ranges which are empty or run off the end of the address space */
598 if (len == 0 || (addr + len - 1) < addr) {
599 error_report("tried to set invalid watchpoint at %"
600 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
601 return -EINVAL;
603 wp = g_malloc(sizeof(*wp));
605 wp->vaddr = addr;
606 wp->len = len;
607 wp->flags = flags;
609 /* keep all GDB-injected watchpoints in front */
610 if (flags & BP_GDB) {
611 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
612 } else {
613 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
616 tlb_flush_page(cpu, addr);
618 if (watchpoint)
619 *watchpoint = wp;
620 return 0;
623 /* Remove a specific watchpoint. */
624 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
625 int flags)
627 CPUWatchpoint *wp;
629 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
630 if (addr == wp->vaddr && len == wp->len
631 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
632 cpu_watchpoint_remove_by_ref(cpu, wp);
633 return 0;
636 return -ENOENT;
639 /* Remove a specific watchpoint by reference. */
640 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
642 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
644 tlb_flush_page(cpu, watchpoint->vaddr);
646 g_free(watchpoint);
649 /* Remove all matching watchpoints. */
650 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
652 CPUWatchpoint *wp, *next;
654 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
655 if (wp->flags & mask) {
656 cpu_watchpoint_remove_by_ref(cpu, wp);
661 /* Return true if this watchpoint address matches the specified
662 * access (ie the address range covered by the watchpoint overlaps
663 * partially or completely with the address range covered by the
664 * access).
666 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
667 vaddr addr,
668 vaddr len)
670 /* We know the lengths are non-zero, but a little caution is
671 * required to avoid errors in the case where the range ends
672 * exactly at the top of the address space and so addr + len
673 * wraps round to zero.
675 vaddr wpend = wp->vaddr + wp->len - 1;
676 vaddr addrend = addr + len - 1;
678 return !(addr > wpend || wp->vaddr > addrend);
681 #endif
683 /* Add a breakpoint. */
684 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
685 CPUBreakpoint **breakpoint)
687 #if defined(TARGET_HAS_ICE)
688 CPUBreakpoint *bp;
690 bp = g_malloc(sizeof(*bp));
692 bp->pc = pc;
693 bp->flags = flags;
695 /* keep all GDB-injected breakpoints in front */
696 if (flags & BP_GDB) {
697 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
698 } else {
699 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
702 breakpoint_invalidate(cpu, pc);
704 if (breakpoint) {
705 *breakpoint = bp;
707 return 0;
708 #else
709 return -ENOSYS;
710 #endif
713 /* Remove a specific breakpoint. */
714 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
716 #if defined(TARGET_HAS_ICE)
717 CPUBreakpoint *bp;
719 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
720 if (bp->pc == pc && bp->flags == flags) {
721 cpu_breakpoint_remove_by_ref(cpu, bp);
722 return 0;
725 return -ENOENT;
726 #else
727 return -ENOSYS;
728 #endif
731 /* Remove a specific breakpoint by reference. */
732 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
734 #if defined(TARGET_HAS_ICE)
735 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
737 breakpoint_invalidate(cpu, breakpoint->pc);
739 g_free(breakpoint);
740 #endif
743 /* Remove all matching breakpoints. */
744 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
746 #if defined(TARGET_HAS_ICE)
747 CPUBreakpoint *bp, *next;
749 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
750 if (bp->flags & mask) {
751 cpu_breakpoint_remove_by_ref(cpu, bp);
754 #endif
757 /* enable or disable single step mode. EXCP_DEBUG is returned by the
758 CPU loop after each instruction */
759 void cpu_single_step(CPUState *cpu, int enabled)
761 #if defined(TARGET_HAS_ICE)
762 if (cpu->singlestep_enabled != enabled) {
763 cpu->singlestep_enabled = enabled;
764 if (kvm_enabled()) {
765 kvm_update_guest_debug(cpu, 0);
766 } else {
767 /* must flush all the translated code to avoid inconsistencies */
768 /* XXX: only flush what is necessary */
769 CPUArchState *env = cpu->env_ptr;
770 tb_flush(env);
773 #endif
776 void cpu_abort(CPUState *cpu, const char *fmt, ...)
778 va_list ap;
779 va_list ap2;
781 va_start(ap, fmt);
782 va_copy(ap2, ap);
783 fprintf(stderr, "qemu: fatal: ");
784 vfprintf(stderr, fmt, ap);
785 fprintf(stderr, "\n");
786 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
787 if (qemu_log_enabled()) {
788 qemu_log("qemu: fatal: ");
789 qemu_log_vprintf(fmt, ap2);
790 qemu_log("\n");
791 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
792 qemu_log_flush();
793 qemu_log_close();
795 va_end(ap2);
796 va_end(ap);
797 #if defined(CONFIG_USER_ONLY)
799 struct sigaction act;
800 sigfillset(&act.sa_mask);
801 act.sa_handler = SIG_DFL;
802 sigaction(SIGABRT, &act, NULL);
804 #endif
805 abort();
808 #if !defined(CONFIG_USER_ONLY)
809 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
811 RAMBlock *block;
813 /* The list is protected by the iothread lock here. */
814 block = ram_list.mru_block;
815 if (block && addr - block->offset < block->length) {
816 goto found;
818 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
819 if (addr - block->offset < block->length) {
820 goto found;
824 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
825 abort();
827 found:
828 ram_list.mru_block = block;
829 return block;
832 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
834 ram_addr_t start1;
835 RAMBlock *block;
836 ram_addr_t end;
838 end = TARGET_PAGE_ALIGN(start + length);
839 start &= TARGET_PAGE_MASK;
841 block = qemu_get_ram_block(start);
842 assert(block == qemu_get_ram_block(end - 1));
843 start1 = (uintptr_t)block->host + (start - block->offset);
844 cpu_tlb_reset_dirty_all(start1, length);
847 /* Note: start and end must be within the same ram block. */
848 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
849 unsigned client)
851 if (length == 0)
852 return;
853 cpu_physical_memory_clear_dirty_range(start, length, client);
855 if (tcg_enabled()) {
856 tlb_reset_dirty_range_all(start, length);
860 static void cpu_physical_memory_set_dirty_tracking(bool enable)
862 in_migration = enable;
865 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
866 MemoryRegionSection *section,
867 target_ulong vaddr,
868 hwaddr paddr, hwaddr xlat,
869 int prot,
870 target_ulong *address)
872 hwaddr iotlb;
873 CPUWatchpoint *wp;
875 if (memory_region_is_ram(section->mr)) {
876 /* Normal RAM. */
877 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
878 + xlat;
879 if (!section->readonly) {
880 iotlb |= PHYS_SECTION_NOTDIRTY;
881 } else {
882 iotlb |= PHYS_SECTION_ROM;
884 } else {
885 iotlb = section - section->address_space->dispatch->map.sections;
886 iotlb += xlat;
889 /* Make accesses to pages with watchpoints go via the
890 watchpoint trap routines. */
891 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
892 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
893 /* Avoid trapping reads of pages with a write breakpoint. */
894 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
895 iotlb = PHYS_SECTION_WATCH + paddr;
896 *address |= TLB_MMIO;
897 break;
902 return iotlb;
904 #endif /* defined(CONFIG_USER_ONLY) */
906 #if !defined(CONFIG_USER_ONLY)
908 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
909 uint16_t section);
910 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
912 static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
915 * Set a custom physical guest memory alloator.
916 * Accelerators with unusual needs may need this. Hopefully, we can
917 * get rid of it eventually.
919 void phys_mem_set_alloc(void *(*alloc)(size_t))
921 phys_mem_alloc = alloc;
924 static uint16_t phys_section_add(PhysPageMap *map,
925 MemoryRegionSection *section)
927 /* The physical section number is ORed with a page-aligned
928 * pointer to produce the iotlb entries. Thus it should
929 * never overflow into the page-aligned value.
931 assert(map->sections_nb < TARGET_PAGE_SIZE);
933 if (map->sections_nb == map->sections_nb_alloc) {
934 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
935 map->sections = g_renew(MemoryRegionSection, map->sections,
936 map->sections_nb_alloc);
938 map->sections[map->sections_nb] = *section;
939 memory_region_ref(section->mr);
940 return map->sections_nb++;
943 static void phys_section_destroy(MemoryRegion *mr)
945 memory_region_unref(mr);
947 if (mr->subpage) {
948 subpage_t *subpage = container_of(mr, subpage_t, iomem);
949 object_unref(OBJECT(&subpage->iomem));
950 g_free(subpage);
954 static void phys_sections_free(PhysPageMap *map)
956 while (map->sections_nb > 0) {
957 MemoryRegionSection *section = &map->sections[--map->sections_nb];
958 phys_section_destroy(section->mr);
960 g_free(map->sections);
961 g_free(map->nodes);
964 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
966 subpage_t *subpage;
967 hwaddr base = section->offset_within_address_space
968 & TARGET_PAGE_MASK;
969 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
970 d->map.nodes, d->map.sections);
971 MemoryRegionSection subsection = {
972 .offset_within_address_space = base,
973 .size = int128_make64(TARGET_PAGE_SIZE),
975 hwaddr start, end;
977 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
979 if (!(existing->mr->subpage)) {
980 subpage = subpage_init(d->as, base);
981 subsection.address_space = d->as;
982 subsection.mr = &subpage->iomem;
983 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
984 phys_section_add(&d->map, &subsection));
985 } else {
986 subpage = container_of(existing->mr, subpage_t, iomem);
988 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
989 end = start + int128_get64(section->size) - 1;
990 subpage_register(subpage, start, end,
991 phys_section_add(&d->map, section));
995 static void register_multipage(AddressSpaceDispatch *d,
996 MemoryRegionSection *section)
998 hwaddr start_addr = section->offset_within_address_space;
999 uint16_t section_index = phys_section_add(&d->map, section);
1000 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1001 TARGET_PAGE_BITS));
1003 assert(num_pages);
1004 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1007 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1009 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1010 AddressSpaceDispatch *d = as->next_dispatch;
1011 MemoryRegionSection now = *section, remain = *section;
1012 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1014 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1015 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1016 - now.offset_within_address_space;
1018 now.size = int128_min(int128_make64(left), now.size);
1019 register_subpage(d, &now);
1020 } else {
1021 now.size = int128_zero();
1023 while (int128_ne(remain.size, now.size)) {
1024 remain.size = int128_sub(remain.size, now.size);
1025 remain.offset_within_address_space += int128_get64(now.size);
1026 remain.offset_within_region += int128_get64(now.size);
1027 now = remain;
1028 if (int128_lt(remain.size, page_size)) {
1029 register_subpage(d, &now);
1030 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1031 now.size = page_size;
1032 register_subpage(d, &now);
1033 } else {
1034 now.size = int128_and(now.size, int128_neg(page_size));
1035 register_multipage(d, &now);
1040 void qemu_flush_coalesced_mmio_buffer(void)
1042 if (kvm_enabled())
1043 kvm_flush_coalesced_mmio_buffer();
1046 void qemu_mutex_lock_ramlist(void)
1048 qemu_mutex_lock(&ram_list.mutex);
1051 void qemu_mutex_unlock_ramlist(void)
1053 qemu_mutex_unlock(&ram_list.mutex);
1056 #ifdef __linux__
1058 #include <sys/vfs.h>
1060 #define HUGETLBFS_MAGIC 0x958458f6
1062 static long gethugepagesize(const char *path, Error **errp)
1064 struct statfs fs;
1065 int ret;
1067 do {
1068 ret = statfs(path, &fs);
1069 } while (ret != 0 && errno == EINTR);
1071 if (ret != 0) {
1072 error_setg_errno(errp, errno, "failed to get page size of file %s",
1073 path);
1074 return 0;
1077 if (fs.f_type != HUGETLBFS_MAGIC)
1078 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1080 return fs.f_bsize;
1083 static void *file_ram_alloc(RAMBlock *block,
1084 ram_addr_t memory,
1085 const char *path,
1086 Error **errp)
1088 char *filename;
1089 char *sanitized_name;
1090 char *c;
1091 void *area = NULL;
1092 int fd;
1093 uint64_t hpagesize;
1094 Error *local_err = NULL;
1096 hpagesize = gethugepagesize(path, &local_err);
1097 if (local_err) {
1098 error_propagate(errp, local_err);
1099 goto error;
1102 if (memory < hpagesize) {
1103 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1104 "or larger than huge page size 0x%" PRIx64,
1105 memory, hpagesize);
1106 goto error;
1109 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1110 error_setg(errp,
1111 "host lacks kvm mmu notifiers, -mem-path unsupported");
1112 goto error;
1115 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1116 sanitized_name = g_strdup(memory_region_name(block->mr));
1117 for (c = sanitized_name; *c != '\0'; c++) {
1118 if (*c == '/')
1119 *c = '_';
1122 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1123 sanitized_name);
1124 g_free(sanitized_name);
1126 fd = mkstemp(filename);
1127 if (fd < 0) {
1128 error_setg_errno(errp, errno,
1129 "unable to create backing store for hugepages");
1130 g_free(filename);
1131 goto error;
1133 unlink(filename);
1134 g_free(filename);
1136 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1139 * ftruncate is not supported by hugetlbfs in older
1140 * hosts, so don't bother bailing out on errors.
1141 * If anything goes wrong with it under other filesystems,
1142 * mmap will fail.
1144 if (ftruncate(fd, memory)) {
1145 perror("ftruncate");
1148 area = mmap(0, memory, PROT_READ | PROT_WRITE,
1149 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
1150 fd, 0);
1151 if (area == MAP_FAILED) {
1152 error_setg_errno(errp, errno,
1153 "unable to map backing store for hugepages");
1154 close(fd);
1155 goto error;
1158 if (mem_prealloc) {
1159 os_mem_prealloc(fd, area, memory);
1162 block->fd = fd;
1163 return area;
1165 error:
1166 if (mem_prealloc) {
1167 error_report("%s\n", error_get_pretty(*errp));
1168 exit(1);
1170 return NULL;
1172 #endif
1174 static ram_addr_t find_ram_offset(ram_addr_t size)
1176 RAMBlock *block, *next_block;
1177 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1179 assert(size != 0); /* it would hand out same offset multiple times */
1181 if (QTAILQ_EMPTY(&ram_list.blocks))
1182 return 0;
1184 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1185 ram_addr_t end, next = RAM_ADDR_MAX;
1187 end = block->offset + block->length;
1189 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1190 if (next_block->offset >= end) {
1191 next = MIN(next, next_block->offset);
1194 if (next - end >= size && next - end < mingap) {
1195 offset = end;
1196 mingap = next - end;
1200 if (offset == RAM_ADDR_MAX) {
1201 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1202 (uint64_t)size);
1203 abort();
1206 return offset;
1209 ram_addr_t last_ram_offset(void)
1211 RAMBlock *block;
1212 ram_addr_t last = 0;
1214 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1215 last = MAX(last, block->offset + block->length);
1217 return last;
1220 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1222 int ret;
1224 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1225 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1226 "dump-guest-core", true)) {
1227 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1228 if (ret) {
1229 perror("qemu_madvise");
1230 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1231 "but dump_guest_core=off specified\n");
1236 static RAMBlock *find_ram_block(ram_addr_t addr)
1238 RAMBlock *block;
1240 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1241 if (block->offset == addr) {
1242 return block;
1246 return NULL;
1249 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1251 RAMBlock *new_block = find_ram_block(addr);
1252 RAMBlock *block;
1254 assert(new_block);
1255 assert(!new_block->idstr[0]);
1257 if (dev) {
1258 char *id = qdev_get_dev_path(dev);
1259 if (id) {
1260 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1261 g_free(id);
1264 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1266 /* This assumes the iothread lock is taken here too. */
1267 qemu_mutex_lock_ramlist();
1268 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1269 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1270 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1271 new_block->idstr);
1272 abort();
1275 qemu_mutex_unlock_ramlist();
1278 void qemu_ram_unset_idstr(ram_addr_t addr)
1280 RAMBlock *block = find_ram_block(addr);
1282 if (block) {
1283 memset(block->idstr, 0, sizeof(block->idstr));
1287 static int memory_try_enable_merging(void *addr, size_t len)
1289 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1290 /* disabled by the user */
1291 return 0;
1294 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1297 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1299 RAMBlock *block;
1300 ram_addr_t old_ram_size, new_ram_size;
1302 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1304 /* This assumes the iothread lock is taken here too. */
1305 qemu_mutex_lock_ramlist();
1306 new_block->offset = find_ram_offset(new_block->length);
1308 if (!new_block->host) {
1309 if (xen_enabled()) {
1310 xen_ram_alloc(new_block->offset, new_block->length, new_block->mr);
1311 } else {
1312 new_block->host = phys_mem_alloc(new_block->length);
1313 if (!new_block->host) {
1314 error_setg_errno(errp, errno,
1315 "cannot set up guest memory '%s'",
1316 memory_region_name(new_block->mr));
1317 qemu_mutex_unlock_ramlist();
1318 return -1;
1320 memory_try_enable_merging(new_block->host, new_block->length);
1324 /* Keep the list sorted from biggest to smallest block. */
1325 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1326 if (block->length < new_block->length) {
1327 break;
1330 if (block) {
1331 QTAILQ_INSERT_BEFORE(block, new_block, next);
1332 } else {
1333 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1335 ram_list.mru_block = NULL;
1337 ram_list.version++;
1338 qemu_mutex_unlock_ramlist();
1340 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1342 if (new_ram_size > old_ram_size) {
1343 int i;
1344 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1345 ram_list.dirty_memory[i] =
1346 bitmap_zero_extend(ram_list.dirty_memory[i],
1347 old_ram_size, new_ram_size);
1350 cpu_physical_memory_set_dirty_range(new_block->offset, new_block->length);
1352 qemu_ram_setup_dump(new_block->host, new_block->length);
1353 qemu_madvise(new_block->host, new_block->length, QEMU_MADV_HUGEPAGE);
1354 qemu_madvise(new_block->host, new_block->length, QEMU_MADV_DONTFORK);
1356 if (kvm_enabled()) {
1357 kvm_setup_guest_memory(new_block->host, new_block->length);
1360 return new_block->offset;
1363 #ifdef __linux__
1364 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1365 bool share, const char *mem_path,
1366 Error **errp)
1368 RAMBlock *new_block;
1369 ram_addr_t addr;
1370 Error *local_err = NULL;
1372 if (xen_enabled()) {
1373 error_setg(errp, "-mem-path not supported with Xen");
1374 return -1;
1377 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1379 * file_ram_alloc() needs to allocate just like
1380 * phys_mem_alloc, but we haven't bothered to provide
1381 * a hook there.
1383 error_setg(errp,
1384 "-mem-path not supported with this accelerator");
1385 return -1;
1388 size = TARGET_PAGE_ALIGN(size);
1389 new_block = g_malloc0(sizeof(*new_block));
1390 new_block->mr = mr;
1391 new_block->length = size;
1392 new_block->flags = share ? RAM_SHARED : 0;
1393 new_block->host = file_ram_alloc(new_block, size,
1394 mem_path, errp);
1395 if (!new_block->host) {
1396 g_free(new_block);
1397 return -1;
1400 addr = ram_block_add(new_block, &local_err);
1401 if (local_err) {
1402 g_free(new_block);
1403 error_propagate(errp, local_err);
1404 return -1;
1406 return addr;
1408 #endif
1410 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1411 MemoryRegion *mr, Error **errp)
1413 RAMBlock *new_block;
1414 ram_addr_t addr;
1415 Error *local_err = NULL;
1417 size = TARGET_PAGE_ALIGN(size);
1418 new_block = g_malloc0(sizeof(*new_block));
1419 new_block->mr = mr;
1420 new_block->length = size;
1421 new_block->fd = -1;
1422 new_block->host = host;
1423 if (host) {
1424 new_block->flags |= RAM_PREALLOC;
1426 addr = ram_block_add(new_block, &local_err);
1427 if (local_err) {
1428 g_free(new_block);
1429 error_propagate(errp, local_err);
1430 return -1;
1432 return addr;
1435 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1437 return qemu_ram_alloc_from_ptr(size, NULL, mr, errp);
1440 void qemu_ram_free_from_ptr(ram_addr_t addr)
1442 RAMBlock *block;
1444 /* This assumes the iothread lock is taken here too. */
1445 qemu_mutex_lock_ramlist();
1446 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1447 if (addr == block->offset) {
1448 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1449 ram_list.mru_block = NULL;
1450 ram_list.version++;
1451 g_free(block);
1452 break;
1455 qemu_mutex_unlock_ramlist();
1458 void qemu_ram_free(ram_addr_t addr)
1460 RAMBlock *block;
1462 /* This assumes the iothread lock is taken here too. */
1463 qemu_mutex_lock_ramlist();
1464 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1465 if (addr == block->offset) {
1466 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1467 ram_list.mru_block = NULL;
1468 ram_list.version++;
1469 if (block->flags & RAM_PREALLOC) {
1471 } else if (xen_enabled()) {
1472 xen_invalidate_map_cache_entry(block->host);
1473 #ifndef _WIN32
1474 } else if (block->fd >= 0) {
1475 munmap(block->host, block->length);
1476 close(block->fd);
1477 #endif
1478 } else {
1479 qemu_anon_ram_free(block->host, block->length);
1481 g_free(block);
1482 break;
1485 qemu_mutex_unlock_ramlist();
1489 #ifndef _WIN32
1490 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1492 RAMBlock *block;
1493 ram_addr_t offset;
1494 int flags;
1495 void *area, *vaddr;
1497 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1498 offset = addr - block->offset;
1499 if (offset < block->length) {
1500 vaddr = block->host + offset;
1501 if (block->flags & RAM_PREALLOC) {
1503 } else if (xen_enabled()) {
1504 abort();
1505 } else {
1506 flags = MAP_FIXED;
1507 munmap(vaddr, length);
1508 if (block->fd >= 0) {
1509 flags |= (block->flags & RAM_SHARED ?
1510 MAP_SHARED : MAP_PRIVATE);
1511 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1512 flags, block->fd, offset);
1513 } else {
1515 * Remap needs to match alloc. Accelerators that
1516 * set phys_mem_alloc never remap. If they did,
1517 * we'd need a remap hook here.
1519 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1521 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1522 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1523 flags, -1, 0);
1525 if (area != vaddr) {
1526 fprintf(stderr, "Could not remap addr: "
1527 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1528 length, addr);
1529 exit(1);
1531 memory_try_enable_merging(vaddr, length);
1532 qemu_ram_setup_dump(vaddr, length);
1534 return;
1538 #endif /* !_WIN32 */
1540 int qemu_get_ram_fd(ram_addr_t addr)
1542 RAMBlock *block = qemu_get_ram_block(addr);
1544 return block->fd;
1547 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1549 RAMBlock *block = qemu_get_ram_block(addr);
1551 return block->host;
1554 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1555 With the exception of the softmmu code in this file, this should
1556 only be used for local memory (e.g. video ram) that the device owns,
1557 and knows it isn't going to access beyond the end of the block.
1559 It should not be used for general purpose DMA.
1560 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1562 void *qemu_get_ram_ptr(ram_addr_t addr)
1564 RAMBlock *block = qemu_get_ram_block(addr);
1566 if (xen_enabled()) {
1567 /* We need to check if the requested address is in the RAM
1568 * because we don't want to map the entire memory in QEMU.
1569 * In that case just map until the end of the page.
1571 if (block->offset == 0) {
1572 return xen_map_cache(addr, 0, 0);
1573 } else if (block->host == NULL) {
1574 block->host =
1575 xen_map_cache(block->offset, block->length, 1);
1578 return block->host + (addr - block->offset);
1581 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1582 * but takes a size argument */
1583 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1585 if (*size == 0) {
1586 return NULL;
1588 if (xen_enabled()) {
1589 return xen_map_cache(addr, *size, 1);
1590 } else {
1591 RAMBlock *block;
1593 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1594 if (addr - block->offset < block->length) {
1595 if (addr - block->offset + *size > block->length)
1596 *size = block->length - addr + block->offset;
1597 return block->host + (addr - block->offset);
1601 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1602 abort();
1606 /* Some of the softmmu routines need to translate from a host pointer
1607 (typically a TLB entry) back to a ram offset. */
1608 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1610 RAMBlock *block;
1611 uint8_t *host = ptr;
1613 if (xen_enabled()) {
1614 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1615 return qemu_get_ram_block(*ram_addr)->mr;
1618 block = ram_list.mru_block;
1619 if (block && block->host && host - block->host < block->length) {
1620 goto found;
1623 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1624 /* This case append when the block is not mapped. */
1625 if (block->host == NULL) {
1626 continue;
1628 if (host - block->host < block->length) {
1629 goto found;
1633 return NULL;
1635 found:
1636 *ram_addr = block->offset + (host - block->host);
1637 return block->mr;
1640 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1641 uint64_t val, unsigned size)
1643 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1644 tb_invalidate_phys_page_fast(ram_addr, size);
1646 switch (size) {
1647 case 1:
1648 stb_p(qemu_get_ram_ptr(ram_addr), val);
1649 break;
1650 case 2:
1651 stw_p(qemu_get_ram_ptr(ram_addr), val);
1652 break;
1653 case 4:
1654 stl_p(qemu_get_ram_ptr(ram_addr), val);
1655 break;
1656 default:
1657 abort();
1659 cpu_physical_memory_set_dirty_range_nocode(ram_addr, size);
1660 /* we remove the notdirty callback only if the code has been
1661 flushed */
1662 if (!cpu_physical_memory_is_clean(ram_addr)) {
1663 CPUArchState *env = current_cpu->env_ptr;
1664 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1668 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1669 unsigned size, bool is_write)
1671 return is_write;
1674 static const MemoryRegionOps notdirty_mem_ops = {
1675 .write = notdirty_mem_write,
1676 .valid.accepts = notdirty_mem_accepts,
1677 .endianness = DEVICE_NATIVE_ENDIAN,
1680 /* Generate a debug exception if a watchpoint has been hit. */
1681 static void check_watchpoint(int offset, int len, int flags)
1683 CPUState *cpu = current_cpu;
1684 CPUArchState *env = cpu->env_ptr;
1685 target_ulong pc, cs_base;
1686 target_ulong vaddr;
1687 CPUWatchpoint *wp;
1688 int cpu_flags;
1690 if (cpu->watchpoint_hit) {
1691 /* We re-entered the check after replacing the TB. Now raise
1692 * the debug interrupt so that is will trigger after the
1693 * current instruction. */
1694 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1695 return;
1697 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1698 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1699 if (cpu_watchpoint_address_matches(wp, vaddr, len)
1700 && (wp->flags & flags)) {
1701 if (flags == BP_MEM_READ) {
1702 wp->flags |= BP_WATCHPOINT_HIT_READ;
1703 } else {
1704 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
1706 wp->hitaddr = vaddr;
1707 if (!cpu->watchpoint_hit) {
1708 cpu->watchpoint_hit = wp;
1709 tb_check_watchpoint(cpu);
1710 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1711 cpu->exception_index = EXCP_DEBUG;
1712 cpu_loop_exit(cpu);
1713 } else {
1714 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1715 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1716 cpu_resume_from_signal(cpu, NULL);
1719 } else {
1720 wp->flags &= ~BP_WATCHPOINT_HIT;
1725 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1726 so these check for a hit then pass through to the normal out-of-line
1727 phys routines. */
1728 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1729 unsigned size)
1731 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_READ);
1732 switch (size) {
1733 case 1: return ldub_phys(&address_space_memory, addr);
1734 case 2: return lduw_phys(&address_space_memory, addr);
1735 case 4: return ldl_phys(&address_space_memory, addr);
1736 default: abort();
1740 static void watch_mem_write(void *opaque, hwaddr addr,
1741 uint64_t val, unsigned size)
1743 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_WRITE);
1744 switch (size) {
1745 case 1:
1746 stb_phys(&address_space_memory, addr, val);
1747 break;
1748 case 2:
1749 stw_phys(&address_space_memory, addr, val);
1750 break;
1751 case 4:
1752 stl_phys(&address_space_memory, addr, val);
1753 break;
1754 default: abort();
1758 static const MemoryRegionOps watch_mem_ops = {
1759 .read = watch_mem_read,
1760 .write = watch_mem_write,
1761 .endianness = DEVICE_NATIVE_ENDIAN,
1764 static uint64_t subpage_read(void *opaque, hwaddr addr,
1765 unsigned len)
1767 subpage_t *subpage = opaque;
1768 uint8_t buf[4];
1770 #if defined(DEBUG_SUBPAGE)
1771 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1772 subpage, len, addr);
1773 #endif
1774 address_space_read(subpage->as, addr + subpage->base, buf, len);
1775 switch (len) {
1776 case 1:
1777 return ldub_p(buf);
1778 case 2:
1779 return lduw_p(buf);
1780 case 4:
1781 return ldl_p(buf);
1782 default:
1783 abort();
1787 static void subpage_write(void *opaque, hwaddr addr,
1788 uint64_t value, unsigned len)
1790 subpage_t *subpage = opaque;
1791 uint8_t buf[4];
1793 #if defined(DEBUG_SUBPAGE)
1794 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1795 " value %"PRIx64"\n",
1796 __func__, subpage, len, addr, value);
1797 #endif
1798 switch (len) {
1799 case 1:
1800 stb_p(buf, value);
1801 break;
1802 case 2:
1803 stw_p(buf, value);
1804 break;
1805 case 4:
1806 stl_p(buf, value);
1807 break;
1808 default:
1809 abort();
1811 address_space_write(subpage->as, addr + subpage->base, buf, len);
1814 static bool subpage_accepts(void *opaque, hwaddr addr,
1815 unsigned len, bool is_write)
1817 subpage_t *subpage = opaque;
1818 #if defined(DEBUG_SUBPAGE)
1819 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1820 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1821 #endif
1823 return address_space_access_valid(subpage->as, addr + subpage->base,
1824 len, is_write);
1827 static const MemoryRegionOps subpage_ops = {
1828 .read = subpage_read,
1829 .write = subpage_write,
1830 .valid.accepts = subpage_accepts,
1831 .endianness = DEVICE_NATIVE_ENDIAN,
1834 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1835 uint16_t section)
1837 int idx, eidx;
1839 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1840 return -1;
1841 idx = SUBPAGE_IDX(start);
1842 eidx = SUBPAGE_IDX(end);
1843 #if defined(DEBUG_SUBPAGE)
1844 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1845 __func__, mmio, start, end, idx, eidx, section);
1846 #endif
1847 for (; idx <= eidx; idx++) {
1848 mmio->sub_section[idx] = section;
1851 return 0;
1854 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1856 subpage_t *mmio;
1858 mmio = g_malloc0(sizeof(subpage_t));
1860 mmio->as = as;
1861 mmio->base = base;
1862 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1863 NULL, TARGET_PAGE_SIZE);
1864 mmio->iomem.subpage = true;
1865 #if defined(DEBUG_SUBPAGE)
1866 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1867 mmio, base, TARGET_PAGE_SIZE);
1868 #endif
1869 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1871 return mmio;
1874 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
1875 MemoryRegion *mr)
1877 assert(as);
1878 MemoryRegionSection section = {
1879 .address_space = as,
1880 .mr = mr,
1881 .offset_within_address_space = 0,
1882 .offset_within_region = 0,
1883 .size = int128_2_64(),
1886 return phys_section_add(map, &section);
1889 MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1891 return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1894 static void io_mem_init(void)
1896 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
1897 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1898 NULL, UINT64_MAX);
1899 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1900 NULL, UINT64_MAX);
1901 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1902 NULL, UINT64_MAX);
1905 static void mem_begin(MemoryListener *listener)
1907 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1908 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1909 uint16_t n;
1911 n = dummy_section(&d->map, as, &io_mem_unassigned);
1912 assert(n == PHYS_SECTION_UNASSIGNED);
1913 n = dummy_section(&d->map, as, &io_mem_notdirty);
1914 assert(n == PHYS_SECTION_NOTDIRTY);
1915 n = dummy_section(&d->map, as, &io_mem_rom);
1916 assert(n == PHYS_SECTION_ROM);
1917 n = dummy_section(&d->map, as, &io_mem_watch);
1918 assert(n == PHYS_SECTION_WATCH);
1920 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1921 d->as = as;
1922 as->next_dispatch = d;
1925 static void mem_commit(MemoryListener *listener)
1927 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1928 AddressSpaceDispatch *cur = as->dispatch;
1929 AddressSpaceDispatch *next = as->next_dispatch;
1931 phys_page_compact_all(next, next->map.nodes_nb);
1933 as->dispatch = next;
1935 if (cur) {
1936 phys_sections_free(&cur->map);
1937 g_free(cur);
1941 static void tcg_commit(MemoryListener *listener)
1943 CPUState *cpu;
1945 /* since each CPU stores ram addresses in its TLB cache, we must
1946 reset the modified entries */
1947 /* XXX: slow ! */
1948 CPU_FOREACH(cpu) {
1949 /* FIXME: Disentangle the cpu.h circular files deps so we can
1950 directly get the right CPU from listener. */
1951 if (cpu->tcg_as_listener != listener) {
1952 continue;
1954 tlb_flush(cpu, 1);
1958 static void core_log_global_start(MemoryListener *listener)
1960 cpu_physical_memory_set_dirty_tracking(true);
1963 static void core_log_global_stop(MemoryListener *listener)
1965 cpu_physical_memory_set_dirty_tracking(false);
1968 static MemoryListener core_memory_listener = {
1969 .log_global_start = core_log_global_start,
1970 .log_global_stop = core_log_global_stop,
1971 .priority = 1,
1974 void address_space_init_dispatch(AddressSpace *as)
1976 as->dispatch = NULL;
1977 as->dispatch_listener = (MemoryListener) {
1978 .begin = mem_begin,
1979 .commit = mem_commit,
1980 .region_add = mem_add,
1981 .region_nop = mem_add,
1982 .priority = 0,
1984 memory_listener_register(&as->dispatch_listener, as);
1987 void address_space_destroy_dispatch(AddressSpace *as)
1989 AddressSpaceDispatch *d = as->dispatch;
1991 memory_listener_unregister(&as->dispatch_listener);
1992 g_free(d);
1993 as->dispatch = NULL;
1996 static void memory_map_init(void)
1998 system_memory = g_malloc(sizeof(*system_memory));
2000 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2001 address_space_init(&address_space_memory, system_memory, "memory");
2003 system_io = g_malloc(sizeof(*system_io));
2004 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2005 65536);
2006 address_space_init(&address_space_io, system_io, "I/O");
2008 memory_listener_register(&core_memory_listener, &address_space_memory);
2011 MemoryRegion *get_system_memory(void)
2013 return system_memory;
2016 MemoryRegion *get_system_io(void)
2018 return system_io;
2021 #endif /* !defined(CONFIG_USER_ONLY) */
2023 /* physical memory access (slow version, mainly for debug) */
2024 #if defined(CONFIG_USER_ONLY)
2025 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2026 uint8_t *buf, int len, int is_write)
2028 int l, flags;
2029 target_ulong page;
2030 void * p;
2032 while (len > 0) {
2033 page = addr & TARGET_PAGE_MASK;
2034 l = (page + TARGET_PAGE_SIZE) - addr;
2035 if (l > len)
2036 l = len;
2037 flags = page_get_flags(page);
2038 if (!(flags & PAGE_VALID))
2039 return -1;
2040 if (is_write) {
2041 if (!(flags & PAGE_WRITE))
2042 return -1;
2043 /* XXX: this code should not depend on lock_user */
2044 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2045 return -1;
2046 memcpy(p, buf, l);
2047 unlock_user(p, addr, l);
2048 } else {
2049 if (!(flags & PAGE_READ))
2050 return -1;
2051 /* XXX: this code should not depend on lock_user */
2052 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2053 return -1;
2054 memcpy(buf, p, l);
2055 unlock_user(p, addr, 0);
2057 len -= l;
2058 buf += l;
2059 addr += l;
2061 return 0;
2064 #else
2066 static void invalidate_and_set_dirty(hwaddr addr,
2067 hwaddr length)
2069 if (cpu_physical_memory_is_clean(addr)) {
2070 /* invalidate code */
2071 tb_invalidate_phys_page_range(addr, addr + length, 0);
2072 /* set dirty bit */
2073 cpu_physical_memory_set_dirty_range_nocode(addr, length);
2075 xen_modified_memory(addr, length);
2078 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2080 unsigned access_size_max = mr->ops->valid.max_access_size;
2082 /* Regions are assumed to support 1-4 byte accesses unless
2083 otherwise specified. */
2084 if (access_size_max == 0) {
2085 access_size_max = 4;
2088 /* Bound the maximum access by the alignment of the address. */
2089 if (!mr->ops->impl.unaligned) {
2090 unsigned align_size_max = addr & -addr;
2091 if (align_size_max != 0 && align_size_max < access_size_max) {
2092 access_size_max = align_size_max;
2096 /* Don't attempt accesses larger than the maximum. */
2097 if (l > access_size_max) {
2098 l = access_size_max;
2100 if (l & (l - 1)) {
2101 l = 1 << (qemu_fls(l) - 1);
2104 return l;
2107 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
2108 int len, bool is_write)
2110 hwaddr l;
2111 uint8_t *ptr;
2112 uint64_t val;
2113 hwaddr addr1;
2114 MemoryRegion *mr;
2115 bool error = false;
2117 while (len > 0) {
2118 l = len;
2119 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2121 if (is_write) {
2122 if (!memory_access_is_direct(mr, is_write)) {
2123 l = memory_access_size(mr, l, addr1);
2124 /* XXX: could force current_cpu to NULL to avoid
2125 potential bugs */
2126 switch (l) {
2127 case 8:
2128 /* 64 bit write access */
2129 val = ldq_p(buf);
2130 error |= io_mem_write(mr, addr1, val, 8);
2131 break;
2132 case 4:
2133 /* 32 bit write access */
2134 val = ldl_p(buf);
2135 error |= io_mem_write(mr, addr1, val, 4);
2136 break;
2137 case 2:
2138 /* 16 bit write access */
2139 val = lduw_p(buf);
2140 error |= io_mem_write(mr, addr1, val, 2);
2141 break;
2142 case 1:
2143 /* 8 bit write access */
2144 val = ldub_p(buf);
2145 error |= io_mem_write(mr, addr1, val, 1);
2146 break;
2147 default:
2148 abort();
2150 } else {
2151 addr1 += memory_region_get_ram_addr(mr);
2152 /* RAM case */
2153 ptr = qemu_get_ram_ptr(addr1);
2154 memcpy(ptr, buf, l);
2155 invalidate_and_set_dirty(addr1, l);
2157 } else {
2158 if (!memory_access_is_direct(mr, is_write)) {
2159 /* I/O case */
2160 l = memory_access_size(mr, l, addr1);
2161 switch (l) {
2162 case 8:
2163 /* 64 bit read access */
2164 error |= io_mem_read(mr, addr1, &val, 8);
2165 stq_p(buf, val);
2166 break;
2167 case 4:
2168 /* 32 bit read access */
2169 error |= io_mem_read(mr, addr1, &val, 4);
2170 stl_p(buf, val);
2171 break;
2172 case 2:
2173 /* 16 bit read access */
2174 error |= io_mem_read(mr, addr1, &val, 2);
2175 stw_p(buf, val);
2176 break;
2177 case 1:
2178 /* 8 bit read access */
2179 error |= io_mem_read(mr, addr1, &val, 1);
2180 stb_p(buf, val);
2181 break;
2182 default:
2183 abort();
2185 } else {
2186 /* RAM case */
2187 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2188 memcpy(buf, ptr, l);
2191 len -= l;
2192 buf += l;
2193 addr += l;
2196 return error;
2199 bool address_space_write(AddressSpace *as, hwaddr addr,
2200 const uint8_t *buf, int len)
2202 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2205 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2207 return address_space_rw(as, addr, buf, len, false);
2211 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2212 int len, int is_write)
2214 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2217 enum write_rom_type {
2218 WRITE_DATA,
2219 FLUSH_CACHE,
2222 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2223 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2225 hwaddr l;
2226 uint8_t *ptr;
2227 hwaddr addr1;
2228 MemoryRegion *mr;
2230 while (len > 0) {
2231 l = len;
2232 mr = address_space_translate(as, addr, &addr1, &l, true);
2234 if (!(memory_region_is_ram(mr) ||
2235 memory_region_is_romd(mr))) {
2236 /* do nothing */
2237 } else {
2238 addr1 += memory_region_get_ram_addr(mr);
2239 /* ROM/RAM case */
2240 ptr = qemu_get_ram_ptr(addr1);
2241 switch (type) {
2242 case WRITE_DATA:
2243 memcpy(ptr, buf, l);
2244 invalidate_and_set_dirty(addr1, l);
2245 break;
2246 case FLUSH_CACHE:
2247 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2248 break;
2251 len -= l;
2252 buf += l;
2253 addr += l;
2257 /* used for ROM loading : can write in RAM and ROM */
2258 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2259 const uint8_t *buf, int len)
2261 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2264 void cpu_flush_icache_range(hwaddr start, int len)
2267 * This function should do the same thing as an icache flush that was
2268 * triggered from within the guest. For TCG we are always cache coherent,
2269 * so there is no need to flush anything. For KVM / Xen we need to flush
2270 * the host's instruction cache at least.
2272 if (tcg_enabled()) {
2273 return;
2276 cpu_physical_memory_write_rom_internal(&address_space_memory,
2277 start, NULL, len, FLUSH_CACHE);
2280 typedef struct {
2281 MemoryRegion *mr;
2282 void *buffer;
2283 hwaddr addr;
2284 hwaddr len;
2285 } BounceBuffer;
2287 static BounceBuffer bounce;
2289 typedef struct MapClient {
2290 void *opaque;
2291 void (*callback)(void *opaque);
2292 QLIST_ENTRY(MapClient) link;
2293 } MapClient;
2295 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2296 = QLIST_HEAD_INITIALIZER(map_client_list);
2298 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2300 MapClient *client = g_malloc(sizeof(*client));
2302 client->opaque = opaque;
2303 client->callback = callback;
2304 QLIST_INSERT_HEAD(&map_client_list, client, link);
2305 return client;
2308 static void cpu_unregister_map_client(void *_client)
2310 MapClient *client = (MapClient *)_client;
2312 QLIST_REMOVE(client, link);
2313 g_free(client);
2316 static void cpu_notify_map_clients(void)
2318 MapClient *client;
2320 while (!QLIST_EMPTY(&map_client_list)) {
2321 client = QLIST_FIRST(&map_client_list);
2322 client->callback(client->opaque);
2323 cpu_unregister_map_client(client);
2327 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2329 MemoryRegion *mr;
2330 hwaddr l, xlat;
2332 while (len > 0) {
2333 l = len;
2334 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2335 if (!memory_access_is_direct(mr, is_write)) {
2336 l = memory_access_size(mr, l, addr);
2337 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2338 return false;
2342 len -= l;
2343 addr += l;
2345 return true;
2348 /* Map a physical memory region into a host virtual address.
2349 * May map a subset of the requested range, given by and returned in *plen.
2350 * May return NULL if resources needed to perform the mapping are exhausted.
2351 * Use only for reads OR writes - not for read-modify-write operations.
2352 * Use cpu_register_map_client() to know when retrying the map operation is
2353 * likely to succeed.
2355 void *address_space_map(AddressSpace *as,
2356 hwaddr addr,
2357 hwaddr *plen,
2358 bool is_write)
2360 hwaddr len = *plen;
2361 hwaddr done = 0;
2362 hwaddr l, xlat, base;
2363 MemoryRegion *mr, *this_mr;
2364 ram_addr_t raddr;
2366 if (len == 0) {
2367 return NULL;
2370 l = len;
2371 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2372 if (!memory_access_is_direct(mr, is_write)) {
2373 if (bounce.buffer) {
2374 return NULL;
2376 /* Avoid unbounded allocations */
2377 l = MIN(l, TARGET_PAGE_SIZE);
2378 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2379 bounce.addr = addr;
2380 bounce.len = l;
2382 memory_region_ref(mr);
2383 bounce.mr = mr;
2384 if (!is_write) {
2385 address_space_read(as, addr, bounce.buffer, l);
2388 *plen = l;
2389 return bounce.buffer;
2392 base = xlat;
2393 raddr = memory_region_get_ram_addr(mr);
2395 for (;;) {
2396 len -= l;
2397 addr += l;
2398 done += l;
2399 if (len == 0) {
2400 break;
2403 l = len;
2404 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2405 if (this_mr != mr || xlat != base + done) {
2406 break;
2410 memory_region_ref(mr);
2411 *plen = done;
2412 return qemu_ram_ptr_length(raddr + base, plen);
2415 /* Unmaps a memory region previously mapped by address_space_map().
2416 * Will also mark the memory as dirty if is_write == 1. access_len gives
2417 * the amount of memory that was actually read or written by the caller.
2419 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2420 int is_write, hwaddr access_len)
2422 if (buffer != bounce.buffer) {
2423 MemoryRegion *mr;
2424 ram_addr_t addr1;
2426 mr = qemu_ram_addr_from_host(buffer, &addr1);
2427 assert(mr != NULL);
2428 if (is_write) {
2429 invalidate_and_set_dirty(addr1, access_len);
2431 if (xen_enabled()) {
2432 xen_invalidate_map_cache_entry(buffer);
2434 memory_region_unref(mr);
2435 return;
2437 if (is_write) {
2438 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2440 qemu_vfree(bounce.buffer);
2441 bounce.buffer = NULL;
2442 memory_region_unref(bounce.mr);
2443 cpu_notify_map_clients();
2446 void *cpu_physical_memory_map(hwaddr addr,
2447 hwaddr *plen,
2448 int is_write)
2450 return address_space_map(&address_space_memory, addr, plen, is_write);
2453 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2454 int is_write, hwaddr access_len)
2456 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2459 /* warning: addr must be aligned */
2460 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2461 enum device_endian endian)
2463 uint8_t *ptr;
2464 uint64_t val;
2465 MemoryRegion *mr;
2466 hwaddr l = 4;
2467 hwaddr addr1;
2469 mr = address_space_translate(as, addr, &addr1, &l, false);
2470 if (l < 4 || !memory_access_is_direct(mr, false)) {
2471 /* I/O case */
2472 io_mem_read(mr, addr1, &val, 4);
2473 #if defined(TARGET_WORDS_BIGENDIAN)
2474 if (endian == DEVICE_LITTLE_ENDIAN) {
2475 val = bswap32(val);
2477 #else
2478 if (endian == DEVICE_BIG_ENDIAN) {
2479 val = bswap32(val);
2481 #endif
2482 } else {
2483 /* RAM case */
2484 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2485 & TARGET_PAGE_MASK)
2486 + addr1);
2487 switch (endian) {
2488 case DEVICE_LITTLE_ENDIAN:
2489 val = ldl_le_p(ptr);
2490 break;
2491 case DEVICE_BIG_ENDIAN:
2492 val = ldl_be_p(ptr);
2493 break;
2494 default:
2495 val = ldl_p(ptr);
2496 break;
2499 return val;
2502 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2504 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2507 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2509 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2512 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2514 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2517 /* warning: addr must be aligned */
2518 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2519 enum device_endian endian)
2521 uint8_t *ptr;
2522 uint64_t val;
2523 MemoryRegion *mr;
2524 hwaddr l = 8;
2525 hwaddr addr1;
2527 mr = address_space_translate(as, addr, &addr1, &l,
2528 false);
2529 if (l < 8 || !memory_access_is_direct(mr, false)) {
2530 /* I/O case */
2531 io_mem_read(mr, addr1, &val, 8);
2532 #if defined(TARGET_WORDS_BIGENDIAN)
2533 if (endian == DEVICE_LITTLE_ENDIAN) {
2534 val = bswap64(val);
2536 #else
2537 if (endian == DEVICE_BIG_ENDIAN) {
2538 val = bswap64(val);
2540 #endif
2541 } else {
2542 /* RAM case */
2543 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2544 & TARGET_PAGE_MASK)
2545 + addr1);
2546 switch (endian) {
2547 case DEVICE_LITTLE_ENDIAN:
2548 val = ldq_le_p(ptr);
2549 break;
2550 case DEVICE_BIG_ENDIAN:
2551 val = ldq_be_p(ptr);
2552 break;
2553 default:
2554 val = ldq_p(ptr);
2555 break;
2558 return val;
2561 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2563 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2566 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2568 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2571 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2573 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2576 /* XXX: optimize */
2577 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2579 uint8_t val;
2580 address_space_rw(as, addr, &val, 1, 0);
2581 return val;
2584 /* warning: addr must be aligned */
2585 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2586 enum device_endian endian)
2588 uint8_t *ptr;
2589 uint64_t val;
2590 MemoryRegion *mr;
2591 hwaddr l = 2;
2592 hwaddr addr1;
2594 mr = address_space_translate(as, addr, &addr1, &l,
2595 false);
2596 if (l < 2 || !memory_access_is_direct(mr, false)) {
2597 /* I/O case */
2598 io_mem_read(mr, addr1, &val, 2);
2599 #if defined(TARGET_WORDS_BIGENDIAN)
2600 if (endian == DEVICE_LITTLE_ENDIAN) {
2601 val = bswap16(val);
2603 #else
2604 if (endian == DEVICE_BIG_ENDIAN) {
2605 val = bswap16(val);
2607 #endif
2608 } else {
2609 /* RAM case */
2610 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2611 & TARGET_PAGE_MASK)
2612 + addr1);
2613 switch (endian) {
2614 case DEVICE_LITTLE_ENDIAN:
2615 val = lduw_le_p(ptr);
2616 break;
2617 case DEVICE_BIG_ENDIAN:
2618 val = lduw_be_p(ptr);
2619 break;
2620 default:
2621 val = lduw_p(ptr);
2622 break;
2625 return val;
2628 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2630 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2633 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2635 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2638 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2640 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2643 /* warning: addr must be aligned. The ram page is not masked as dirty
2644 and the code inside is not invalidated. It is useful if the dirty
2645 bits are used to track modified PTEs */
2646 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2648 uint8_t *ptr;
2649 MemoryRegion *mr;
2650 hwaddr l = 4;
2651 hwaddr addr1;
2653 mr = address_space_translate(as, addr, &addr1, &l,
2654 true);
2655 if (l < 4 || !memory_access_is_direct(mr, true)) {
2656 io_mem_write(mr, addr1, val, 4);
2657 } else {
2658 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2659 ptr = qemu_get_ram_ptr(addr1);
2660 stl_p(ptr, val);
2662 if (unlikely(in_migration)) {
2663 if (cpu_physical_memory_is_clean(addr1)) {
2664 /* invalidate code */
2665 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2666 /* set dirty bit */
2667 cpu_physical_memory_set_dirty_range_nocode(addr1, 4);
2673 /* warning: addr must be aligned */
2674 static inline void stl_phys_internal(AddressSpace *as,
2675 hwaddr addr, uint32_t val,
2676 enum device_endian endian)
2678 uint8_t *ptr;
2679 MemoryRegion *mr;
2680 hwaddr l = 4;
2681 hwaddr addr1;
2683 mr = address_space_translate(as, addr, &addr1, &l,
2684 true);
2685 if (l < 4 || !memory_access_is_direct(mr, true)) {
2686 #if defined(TARGET_WORDS_BIGENDIAN)
2687 if (endian == DEVICE_LITTLE_ENDIAN) {
2688 val = bswap32(val);
2690 #else
2691 if (endian == DEVICE_BIG_ENDIAN) {
2692 val = bswap32(val);
2694 #endif
2695 io_mem_write(mr, addr1, val, 4);
2696 } else {
2697 /* RAM case */
2698 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2699 ptr = qemu_get_ram_ptr(addr1);
2700 switch (endian) {
2701 case DEVICE_LITTLE_ENDIAN:
2702 stl_le_p(ptr, val);
2703 break;
2704 case DEVICE_BIG_ENDIAN:
2705 stl_be_p(ptr, val);
2706 break;
2707 default:
2708 stl_p(ptr, val);
2709 break;
2711 invalidate_and_set_dirty(addr1, 4);
2715 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2717 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2720 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2722 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2725 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2727 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2730 /* XXX: optimize */
2731 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2733 uint8_t v = val;
2734 address_space_rw(as, addr, &v, 1, 1);
2737 /* warning: addr must be aligned */
2738 static inline void stw_phys_internal(AddressSpace *as,
2739 hwaddr addr, uint32_t val,
2740 enum device_endian endian)
2742 uint8_t *ptr;
2743 MemoryRegion *mr;
2744 hwaddr l = 2;
2745 hwaddr addr1;
2747 mr = address_space_translate(as, addr, &addr1, &l, true);
2748 if (l < 2 || !memory_access_is_direct(mr, true)) {
2749 #if defined(TARGET_WORDS_BIGENDIAN)
2750 if (endian == DEVICE_LITTLE_ENDIAN) {
2751 val = bswap16(val);
2753 #else
2754 if (endian == DEVICE_BIG_ENDIAN) {
2755 val = bswap16(val);
2757 #endif
2758 io_mem_write(mr, addr1, val, 2);
2759 } else {
2760 /* RAM case */
2761 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2762 ptr = qemu_get_ram_ptr(addr1);
2763 switch (endian) {
2764 case DEVICE_LITTLE_ENDIAN:
2765 stw_le_p(ptr, val);
2766 break;
2767 case DEVICE_BIG_ENDIAN:
2768 stw_be_p(ptr, val);
2769 break;
2770 default:
2771 stw_p(ptr, val);
2772 break;
2774 invalidate_and_set_dirty(addr1, 2);
2778 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2780 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2783 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2785 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2788 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2790 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2793 /* XXX: optimize */
2794 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2796 val = tswap64(val);
2797 address_space_rw(as, addr, (void *) &val, 8, 1);
2800 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2802 val = cpu_to_le64(val);
2803 address_space_rw(as, addr, (void *) &val, 8, 1);
2806 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2808 val = cpu_to_be64(val);
2809 address_space_rw(as, addr, (void *) &val, 8, 1);
2812 /* virtual memory access for debug (includes writing to ROM) */
2813 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2814 uint8_t *buf, int len, int is_write)
2816 int l;
2817 hwaddr phys_addr;
2818 target_ulong page;
2820 while (len > 0) {
2821 page = addr & TARGET_PAGE_MASK;
2822 phys_addr = cpu_get_phys_page_debug(cpu, page);
2823 /* if no physical page mapped, return an error */
2824 if (phys_addr == -1)
2825 return -1;
2826 l = (page + TARGET_PAGE_SIZE) - addr;
2827 if (l > len)
2828 l = len;
2829 phys_addr += (addr & ~TARGET_PAGE_MASK);
2830 if (is_write) {
2831 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
2832 } else {
2833 address_space_rw(cpu->as, phys_addr, buf, l, 0);
2835 len -= l;
2836 buf += l;
2837 addr += l;
2839 return 0;
2841 #endif
2844 * A helper function for the _utterly broken_ virtio device model to find out if
2845 * it's running on a big endian machine. Don't do this at home kids!
2847 bool target_words_bigendian(void);
2848 bool target_words_bigendian(void)
2850 #if defined(TARGET_WORDS_BIGENDIAN)
2851 return true;
2852 #else
2853 return false;
2854 #endif
2857 #ifndef CONFIG_USER_ONLY
2858 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2860 MemoryRegion*mr;
2861 hwaddr l = 1;
2863 mr = address_space_translate(&address_space_memory,
2864 phys_addr, &phys_addr, &l, false);
2866 return !(memory_region_is_ram(mr) ||
2867 memory_region_is_romd(mr));
2870 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2872 RAMBlock *block;
2874 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2875 func(block->host, block->offset, block->length, opaque);
2878 #endif