iotests: add transactional incremental backup test
[qemu/rayw.git] / exec.c
blobb09f18b2a4d9e063929486cc700fad8075784d25
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
31 #endif
32 #include "hw/qdev.h"
33 #include "qemu/osdep.h"
34 #include "sysemu/kvm.h"
35 #include "sysemu/sysemu.h"
36 #include "hw/xen/xen.h"
37 #include "qemu/timer.h"
38 #include "qemu/config-file.h"
39 #include "qemu/error-report.h"
40 #include "exec/memory.h"
41 #include "sysemu/dma.h"
42 #include "exec/address-spaces.h"
43 #if defined(CONFIG_USER_ONLY)
44 #include <qemu.h>
45 #else /* !CONFIG_USER_ONLY */
46 #include "sysemu/xen-mapcache.h"
47 #include "trace.h"
48 #endif
49 #include "exec/cpu-all.h"
50 #include "qemu/rcu_queue.h"
51 #include "qemu/main-loop.h"
52 #include "translate-all.h"
53 #include "sysemu/replay.h"
55 #include "exec/memory-internal.h"
56 #include "exec/ram_addr.h"
58 #include "qemu/range.h"
59 #ifndef _WIN32
60 #include "qemu/mmap-alloc.h"
61 #endif
63 //#define DEBUG_SUBPAGE
65 #if !defined(CONFIG_USER_ONLY)
66 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
67 * are protected by the ramlist lock.
69 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
71 static MemoryRegion *system_memory;
72 static MemoryRegion *system_io;
74 AddressSpace address_space_io;
75 AddressSpace address_space_memory;
77 MemoryRegion io_mem_rom, io_mem_notdirty;
78 static MemoryRegion io_mem_unassigned;
80 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
81 #define RAM_PREALLOC (1 << 0)
83 /* RAM is mmap-ed with MAP_SHARED */
84 #define RAM_SHARED (1 << 1)
86 /* Only a portion of RAM (used_length) is actually used, and migrated.
87 * This used_length size can change across reboots.
89 #define RAM_RESIZEABLE (1 << 2)
91 /* RAM is backed by an mmapped file.
93 #define RAM_FILE (1 << 3)
94 #endif
96 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
97 /* current CPU in the current thread. It is only valid inside
98 cpu_exec() */
99 __thread CPUState *current_cpu;
100 /* 0 = Do not count executed instructions.
101 1 = Precise instruction counting.
102 2 = Adaptive rate instruction counting. */
103 int use_icount;
105 #if !defined(CONFIG_USER_ONLY)
107 typedef struct PhysPageEntry PhysPageEntry;
109 struct PhysPageEntry {
110 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
111 uint32_t skip : 6;
112 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
113 uint32_t ptr : 26;
116 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
118 /* Size of the L2 (and L3, etc) page tables. */
119 #define ADDR_SPACE_BITS 64
121 #define P_L2_BITS 9
122 #define P_L2_SIZE (1 << P_L2_BITS)
124 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
126 typedef PhysPageEntry Node[P_L2_SIZE];
128 typedef struct PhysPageMap {
129 struct rcu_head rcu;
131 unsigned sections_nb;
132 unsigned sections_nb_alloc;
133 unsigned nodes_nb;
134 unsigned nodes_nb_alloc;
135 Node *nodes;
136 MemoryRegionSection *sections;
137 } PhysPageMap;
139 struct AddressSpaceDispatch {
140 struct rcu_head rcu;
142 /* This is a multi-level map on the physical address space.
143 * The bottom level has pointers to MemoryRegionSections.
145 PhysPageEntry phys_map;
146 PhysPageMap map;
147 AddressSpace *as;
150 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
151 typedef struct subpage_t {
152 MemoryRegion iomem;
153 AddressSpace *as;
154 hwaddr base;
155 uint16_t sub_section[TARGET_PAGE_SIZE];
156 } subpage_t;
158 #define PHYS_SECTION_UNASSIGNED 0
159 #define PHYS_SECTION_NOTDIRTY 1
160 #define PHYS_SECTION_ROM 2
161 #define PHYS_SECTION_WATCH 3
163 static void io_mem_init(void);
164 static void memory_map_init(void);
165 static void tcg_commit(MemoryListener *listener);
167 static MemoryRegion io_mem_watch;
170 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
171 * @cpu: the CPU whose AddressSpace this is
172 * @as: the AddressSpace itself
173 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
174 * @tcg_as_listener: listener for tracking changes to the AddressSpace
176 struct CPUAddressSpace {
177 CPUState *cpu;
178 AddressSpace *as;
179 struct AddressSpaceDispatch *memory_dispatch;
180 MemoryListener tcg_as_listener;
183 #endif
185 #if !defined(CONFIG_USER_ONLY)
187 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
189 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
190 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
191 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
192 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
196 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
198 unsigned i;
199 uint32_t ret;
200 PhysPageEntry e;
201 PhysPageEntry *p;
203 ret = map->nodes_nb++;
204 p = map->nodes[ret];
205 assert(ret != PHYS_MAP_NODE_NIL);
206 assert(ret != map->nodes_nb_alloc);
208 e.skip = leaf ? 0 : 1;
209 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
210 for (i = 0; i < P_L2_SIZE; ++i) {
211 memcpy(&p[i], &e, sizeof(e));
213 return ret;
216 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
217 hwaddr *index, hwaddr *nb, uint16_t leaf,
218 int level)
220 PhysPageEntry *p;
221 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
223 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
224 lp->ptr = phys_map_node_alloc(map, level == 0);
226 p = map->nodes[lp->ptr];
227 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
229 while (*nb && lp < &p[P_L2_SIZE]) {
230 if ((*index & (step - 1)) == 0 && *nb >= step) {
231 lp->skip = 0;
232 lp->ptr = leaf;
233 *index += step;
234 *nb -= step;
235 } else {
236 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
238 ++lp;
242 static void phys_page_set(AddressSpaceDispatch *d,
243 hwaddr index, hwaddr nb,
244 uint16_t leaf)
246 /* Wildly overreserve - it doesn't matter much. */
247 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
249 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
252 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
253 * and update our entry so we can skip it and go directly to the destination.
255 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
257 unsigned valid_ptr = P_L2_SIZE;
258 int valid = 0;
259 PhysPageEntry *p;
260 int i;
262 if (lp->ptr == PHYS_MAP_NODE_NIL) {
263 return;
266 p = nodes[lp->ptr];
267 for (i = 0; i < P_L2_SIZE; i++) {
268 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
269 continue;
272 valid_ptr = i;
273 valid++;
274 if (p[i].skip) {
275 phys_page_compact(&p[i], nodes, compacted);
279 /* We can only compress if there's only one child. */
280 if (valid != 1) {
281 return;
284 assert(valid_ptr < P_L2_SIZE);
286 /* Don't compress if it won't fit in the # of bits we have. */
287 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
288 return;
291 lp->ptr = p[valid_ptr].ptr;
292 if (!p[valid_ptr].skip) {
293 /* If our only child is a leaf, make this a leaf. */
294 /* By design, we should have made this node a leaf to begin with so we
295 * should never reach here.
296 * But since it's so simple to handle this, let's do it just in case we
297 * change this rule.
299 lp->skip = 0;
300 } else {
301 lp->skip += p[valid_ptr].skip;
305 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
307 DECLARE_BITMAP(compacted, nodes_nb);
309 if (d->phys_map.skip) {
310 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
314 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
315 Node *nodes, MemoryRegionSection *sections)
317 PhysPageEntry *p;
318 hwaddr index = addr >> TARGET_PAGE_BITS;
319 int i;
321 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
322 if (lp.ptr == PHYS_MAP_NODE_NIL) {
323 return &sections[PHYS_SECTION_UNASSIGNED];
325 p = nodes[lp.ptr];
326 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
329 if (sections[lp.ptr].size.hi ||
330 range_covers_byte(sections[lp.ptr].offset_within_address_space,
331 sections[lp.ptr].size.lo, addr)) {
332 return &sections[lp.ptr];
333 } else {
334 return &sections[PHYS_SECTION_UNASSIGNED];
338 bool memory_region_is_unassigned(MemoryRegion *mr)
340 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
341 && mr != &io_mem_watch;
344 /* Called from RCU critical section */
345 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
346 hwaddr addr,
347 bool resolve_subpage)
349 MemoryRegionSection *section;
350 subpage_t *subpage;
352 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
353 if (resolve_subpage && section->mr->subpage) {
354 subpage = container_of(section->mr, subpage_t, iomem);
355 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
357 return section;
360 /* Called from RCU critical section */
361 static MemoryRegionSection *
362 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
363 hwaddr *plen, bool resolve_subpage)
365 MemoryRegionSection *section;
366 MemoryRegion *mr;
367 Int128 diff;
369 section = address_space_lookup_region(d, addr, resolve_subpage);
370 /* Compute offset within MemoryRegionSection */
371 addr -= section->offset_within_address_space;
373 /* Compute offset within MemoryRegion */
374 *xlat = addr + section->offset_within_region;
376 mr = section->mr;
378 /* MMIO registers can be expected to perform full-width accesses based only
379 * on their address, without considering adjacent registers that could
380 * decode to completely different MemoryRegions. When such registers
381 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
382 * regions overlap wildly. For this reason we cannot clamp the accesses
383 * here.
385 * If the length is small (as is the case for address_space_ldl/stl),
386 * everything works fine. If the incoming length is large, however,
387 * the caller really has to do the clamping through memory_access_size.
389 if (memory_region_is_ram(mr)) {
390 diff = int128_sub(section->size, int128_make64(addr));
391 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
393 return section;
396 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
398 if (memory_region_is_ram(mr)) {
399 return !(is_write && mr->readonly);
401 if (memory_region_is_romd(mr)) {
402 return !is_write;
405 return false;
408 /* Called from RCU critical section */
409 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
410 hwaddr *xlat, hwaddr *plen,
411 bool is_write)
413 IOMMUTLBEntry iotlb;
414 MemoryRegionSection *section;
415 MemoryRegion *mr;
417 for (;;) {
418 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
419 section = address_space_translate_internal(d, addr, &addr, plen, true);
420 mr = section->mr;
422 if (!mr->iommu_ops) {
423 break;
426 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
427 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
428 | (addr & iotlb.addr_mask));
429 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
430 if (!(iotlb.perm & (1 << is_write))) {
431 mr = &io_mem_unassigned;
432 break;
435 as = iotlb.target_as;
438 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
439 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
440 *plen = MIN(page, *plen);
443 *xlat = addr;
444 return mr;
447 /* Called from RCU critical section */
448 MemoryRegionSection *
449 address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
450 hwaddr *xlat, hwaddr *plen)
452 MemoryRegionSection *section;
453 section = address_space_translate_internal(cpu->cpu_ases[0].memory_dispatch,
454 addr, xlat, plen, false);
456 assert(!section->mr->iommu_ops);
457 return section;
459 #endif
461 #if !defined(CONFIG_USER_ONLY)
463 static int cpu_common_post_load(void *opaque, int version_id)
465 CPUState *cpu = opaque;
467 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
468 version_id is increased. */
469 cpu->interrupt_request &= ~0x01;
470 tlb_flush(cpu, 1);
472 return 0;
475 static int cpu_common_pre_load(void *opaque)
477 CPUState *cpu = opaque;
479 cpu->exception_index = -1;
481 return 0;
484 static bool cpu_common_exception_index_needed(void *opaque)
486 CPUState *cpu = opaque;
488 return tcg_enabled() && cpu->exception_index != -1;
491 static const VMStateDescription vmstate_cpu_common_exception_index = {
492 .name = "cpu_common/exception_index",
493 .version_id = 1,
494 .minimum_version_id = 1,
495 .needed = cpu_common_exception_index_needed,
496 .fields = (VMStateField[]) {
497 VMSTATE_INT32(exception_index, CPUState),
498 VMSTATE_END_OF_LIST()
502 static bool cpu_common_crash_occurred_needed(void *opaque)
504 CPUState *cpu = opaque;
506 return cpu->crash_occurred;
509 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
510 .name = "cpu_common/crash_occurred",
511 .version_id = 1,
512 .minimum_version_id = 1,
513 .needed = cpu_common_crash_occurred_needed,
514 .fields = (VMStateField[]) {
515 VMSTATE_BOOL(crash_occurred, CPUState),
516 VMSTATE_END_OF_LIST()
520 const VMStateDescription vmstate_cpu_common = {
521 .name = "cpu_common",
522 .version_id = 1,
523 .minimum_version_id = 1,
524 .pre_load = cpu_common_pre_load,
525 .post_load = cpu_common_post_load,
526 .fields = (VMStateField[]) {
527 VMSTATE_UINT32(halted, CPUState),
528 VMSTATE_UINT32(interrupt_request, CPUState),
529 VMSTATE_END_OF_LIST()
531 .subsections = (const VMStateDescription*[]) {
532 &vmstate_cpu_common_exception_index,
533 &vmstate_cpu_common_crash_occurred,
534 NULL
538 #endif
540 CPUState *qemu_get_cpu(int index)
542 CPUState *cpu;
544 CPU_FOREACH(cpu) {
545 if (cpu->cpu_index == index) {
546 return cpu;
550 return NULL;
553 #if !defined(CONFIG_USER_ONLY)
554 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
556 /* We only support one address space per cpu at the moment. */
557 assert(cpu->as == as);
559 if (cpu->cpu_ases) {
560 /* We've already registered the listener for our only AS */
561 return;
564 cpu->cpu_ases = g_new0(CPUAddressSpace, 1);
565 cpu->cpu_ases[0].cpu = cpu;
566 cpu->cpu_ases[0].as = as;
567 cpu->cpu_ases[0].tcg_as_listener.commit = tcg_commit;
568 memory_listener_register(&cpu->cpu_ases[0].tcg_as_listener, as);
570 #endif
572 #ifndef CONFIG_USER_ONLY
573 static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
575 static int cpu_get_free_index(Error **errp)
577 int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
579 if (cpu >= MAX_CPUMASK_BITS) {
580 error_setg(errp, "Trying to use more CPUs than max of %d",
581 MAX_CPUMASK_BITS);
582 return -1;
585 bitmap_set(cpu_index_map, cpu, 1);
586 return cpu;
589 void cpu_exec_exit(CPUState *cpu)
591 if (cpu->cpu_index == -1) {
592 /* cpu_index was never allocated by this @cpu or was already freed. */
593 return;
596 bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
597 cpu->cpu_index = -1;
599 #else
601 static int cpu_get_free_index(Error **errp)
603 CPUState *some_cpu;
604 int cpu_index = 0;
606 CPU_FOREACH(some_cpu) {
607 cpu_index++;
609 return cpu_index;
612 void cpu_exec_exit(CPUState *cpu)
615 #endif
617 void cpu_exec_init(CPUState *cpu, Error **errp)
619 CPUClass *cc = CPU_GET_CLASS(cpu);
620 int cpu_index;
621 Error *local_err = NULL;
623 #ifndef CONFIG_USER_ONLY
624 cpu->as = &address_space_memory;
625 cpu->thread_id = qemu_get_thread_id();
626 #endif
628 #if defined(CONFIG_USER_ONLY)
629 cpu_list_lock();
630 #endif
631 cpu_index = cpu->cpu_index = cpu_get_free_index(&local_err);
632 if (local_err) {
633 error_propagate(errp, local_err);
634 #if defined(CONFIG_USER_ONLY)
635 cpu_list_unlock();
636 #endif
637 return;
639 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
640 #if defined(CONFIG_USER_ONLY)
641 cpu_list_unlock();
642 #endif
643 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
644 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
646 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
647 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
648 cpu_save, cpu_load, cpu->env_ptr);
649 assert(cc->vmsd == NULL);
650 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
651 #endif
652 if (cc->vmsd != NULL) {
653 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
657 #if defined(CONFIG_USER_ONLY)
658 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
660 tb_invalidate_phys_page_range(pc, pc + 1, 0);
662 #else
663 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
665 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
666 if (phys != -1) {
667 tb_invalidate_phys_addr(cpu->as,
668 phys | (pc & ~TARGET_PAGE_MASK));
671 #endif
673 #if defined(CONFIG_USER_ONLY)
674 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
679 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
680 int flags)
682 return -ENOSYS;
685 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
689 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
690 int flags, CPUWatchpoint **watchpoint)
692 return -ENOSYS;
694 #else
695 /* Add a watchpoint. */
696 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
697 int flags, CPUWatchpoint **watchpoint)
699 CPUWatchpoint *wp;
701 /* forbid ranges which are empty or run off the end of the address space */
702 if (len == 0 || (addr + len - 1) < addr) {
703 error_report("tried to set invalid watchpoint at %"
704 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
705 return -EINVAL;
707 wp = g_malloc(sizeof(*wp));
709 wp->vaddr = addr;
710 wp->len = len;
711 wp->flags = flags;
713 /* keep all GDB-injected watchpoints in front */
714 if (flags & BP_GDB) {
715 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
716 } else {
717 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
720 tlb_flush_page(cpu, addr);
722 if (watchpoint)
723 *watchpoint = wp;
724 return 0;
727 /* Remove a specific watchpoint. */
728 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
729 int flags)
731 CPUWatchpoint *wp;
733 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
734 if (addr == wp->vaddr && len == wp->len
735 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
736 cpu_watchpoint_remove_by_ref(cpu, wp);
737 return 0;
740 return -ENOENT;
743 /* Remove a specific watchpoint by reference. */
744 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
746 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
748 tlb_flush_page(cpu, watchpoint->vaddr);
750 g_free(watchpoint);
753 /* Remove all matching watchpoints. */
754 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
756 CPUWatchpoint *wp, *next;
758 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
759 if (wp->flags & mask) {
760 cpu_watchpoint_remove_by_ref(cpu, wp);
765 /* Return true if this watchpoint address matches the specified
766 * access (ie the address range covered by the watchpoint overlaps
767 * partially or completely with the address range covered by the
768 * access).
770 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
771 vaddr addr,
772 vaddr len)
774 /* We know the lengths are non-zero, but a little caution is
775 * required to avoid errors in the case where the range ends
776 * exactly at the top of the address space and so addr + len
777 * wraps round to zero.
779 vaddr wpend = wp->vaddr + wp->len - 1;
780 vaddr addrend = addr + len - 1;
782 return !(addr > wpend || wp->vaddr > addrend);
785 #endif
787 /* Add a breakpoint. */
788 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
789 CPUBreakpoint **breakpoint)
791 CPUBreakpoint *bp;
793 bp = g_malloc(sizeof(*bp));
795 bp->pc = pc;
796 bp->flags = flags;
798 /* keep all GDB-injected breakpoints in front */
799 if (flags & BP_GDB) {
800 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
801 } else {
802 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
805 breakpoint_invalidate(cpu, pc);
807 if (breakpoint) {
808 *breakpoint = bp;
810 return 0;
813 /* Remove a specific breakpoint. */
814 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
816 CPUBreakpoint *bp;
818 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
819 if (bp->pc == pc && bp->flags == flags) {
820 cpu_breakpoint_remove_by_ref(cpu, bp);
821 return 0;
824 return -ENOENT;
827 /* Remove a specific breakpoint by reference. */
828 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
830 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
832 breakpoint_invalidate(cpu, breakpoint->pc);
834 g_free(breakpoint);
837 /* Remove all matching breakpoints. */
838 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
840 CPUBreakpoint *bp, *next;
842 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
843 if (bp->flags & mask) {
844 cpu_breakpoint_remove_by_ref(cpu, bp);
849 /* enable or disable single step mode. EXCP_DEBUG is returned by the
850 CPU loop after each instruction */
851 void cpu_single_step(CPUState *cpu, int enabled)
853 if (cpu->singlestep_enabled != enabled) {
854 cpu->singlestep_enabled = enabled;
855 if (kvm_enabled()) {
856 kvm_update_guest_debug(cpu, 0);
857 } else {
858 /* must flush all the translated code to avoid inconsistencies */
859 /* XXX: only flush what is necessary */
860 tb_flush(cpu);
865 void cpu_abort(CPUState *cpu, const char *fmt, ...)
867 va_list ap;
868 va_list ap2;
870 va_start(ap, fmt);
871 va_copy(ap2, ap);
872 fprintf(stderr, "qemu: fatal: ");
873 vfprintf(stderr, fmt, ap);
874 fprintf(stderr, "\n");
875 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
876 if (qemu_log_enabled()) {
877 qemu_log("qemu: fatal: ");
878 qemu_log_vprintf(fmt, ap2);
879 qemu_log("\n");
880 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
881 qemu_log_flush();
882 qemu_log_close();
884 va_end(ap2);
885 va_end(ap);
886 replay_finish();
887 #if defined(CONFIG_USER_ONLY)
889 struct sigaction act;
890 sigfillset(&act.sa_mask);
891 act.sa_handler = SIG_DFL;
892 sigaction(SIGABRT, &act, NULL);
894 #endif
895 abort();
898 #if !defined(CONFIG_USER_ONLY)
899 /* Called from RCU critical section */
900 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
902 RAMBlock *block;
904 block = atomic_rcu_read(&ram_list.mru_block);
905 if (block && addr - block->offset < block->max_length) {
906 return block;
908 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
909 if (addr - block->offset < block->max_length) {
910 goto found;
914 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
915 abort();
917 found:
918 /* It is safe to write mru_block outside the iothread lock. This
919 * is what happens:
921 * mru_block = xxx
922 * rcu_read_unlock()
923 * xxx removed from list
924 * rcu_read_lock()
925 * read mru_block
926 * mru_block = NULL;
927 * call_rcu(reclaim_ramblock, xxx);
928 * rcu_read_unlock()
930 * atomic_rcu_set is not needed here. The block was already published
931 * when it was placed into the list. Here we're just making an extra
932 * copy of the pointer.
934 ram_list.mru_block = block;
935 return block;
938 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
940 CPUState *cpu;
941 ram_addr_t start1;
942 RAMBlock *block;
943 ram_addr_t end;
945 end = TARGET_PAGE_ALIGN(start + length);
946 start &= TARGET_PAGE_MASK;
948 rcu_read_lock();
949 block = qemu_get_ram_block(start);
950 assert(block == qemu_get_ram_block(end - 1));
951 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
952 CPU_FOREACH(cpu) {
953 tlb_reset_dirty(cpu, start1, length);
955 rcu_read_unlock();
958 /* Note: start and end must be within the same ram block. */
959 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
960 ram_addr_t length,
961 unsigned client)
963 unsigned long end, page;
964 bool dirty;
966 if (length == 0) {
967 return false;
970 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
971 page = start >> TARGET_PAGE_BITS;
972 dirty = bitmap_test_and_clear_atomic(ram_list.dirty_memory[client],
973 page, end - page);
975 if (dirty && tcg_enabled()) {
976 tlb_reset_dirty_range_all(start, length);
979 return dirty;
982 /* Called from RCU critical section */
983 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
984 MemoryRegionSection *section,
985 target_ulong vaddr,
986 hwaddr paddr, hwaddr xlat,
987 int prot,
988 target_ulong *address)
990 hwaddr iotlb;
991 CPUWatchpoint *wp;
993 if (memory_region_is_ram(section->mr)) {
994 /* Normal RAM. */
995 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
996 + xlat;
997 if (!section->readonly) {
998 iotlb |= PHYS_SECTION_NOTDIRTY;
999 } else {
1000 iotlb |= PHYS_SECTION_ROM;
1002 } else {
1003 AddressSpaceDispatch *d;
1005 d = atomic_rcu_read(&section->address_space->dispatch);
1006 iotlb = section - d->map.sections;
1007 iotlb += xlat;
1010 /* Make accesses to pages with watchpoints go via the
1011 watchpoint trap routines. */
1012 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1013 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1014 /* Avoid trapping reads of pages with a write breakpoint. */
1015 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1016 iotlb = PHYS_SECTION_WATCH + paddr;
1017 *address |= TLB_MMIO;
1018 break;
1023 return iotlb;
1025 #endif /* defined(CONFIG_USER_ONLY) */
1027 #if !defined(CONFIG_USER_ONLY)
1029 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1030 uint16_t section);
1031 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1033 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1034 qemu_anon_ram_alloc;
1037 * Set a custom physical guest memory alloator.
1038 * Accelerators with unusual needs may need this. Hopefully, we can
1039 * get rid of it eventually.
1041 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1043 phys_mem_alloc = alloc;
1046 static uint16_t phys_section_add(PhysPageMap *map,
1047 MemoryRegionSection *section)
1049 /* The physical section number is ORed with a page-aligned
1050 * pointer to produce the iotlb entries. Thus it should
1051 * never overflow into the page-aligned value.
1053 assert(map->sections_nb < TARGET_PAGE_SIZE);
1055 if (map->sections_nb == map->sections_nb_alloc) {
1056 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1057 map->sections = g_renew(MemoryRegionSection, map->sections,
1058 map->sections_nb_alloc);
1060 map->sections[map->sections_nb] = *section;
1061 memory_region_ref(section->mr);
1062 return map->sections_nb++;
1065 static void phys_section_destroy(MemoryRegion *mr)
1067 memory_region_unref(mr);
1069 if (mr->subpage) {
1070 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1071 object_unref(OBJECT(&subpage->iomem));
1072 g_free(subpage);
1076 static void phys_sections_free(PhysPageMap *map)
1078 while (map->sections_nb > 0) {
1079 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1080 phys_section_destroy(section->mr);
1082 g_free(map->sections);
1083 g_free(map->nodes);
1086 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1088 subpage_t *subpage;
1089 hwaddr base = section->offset_within_address_space
1090 & TARGET_PAGE_MASK;
1091 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1092 d->map.nodes, d->map.sections);
1093 MemoryRegionSection subsection = {
1094 .offset_within_address_space = base,
1095 .size = int128_make64(TARGET_PAGE_SIZE),
1097 hwaddr start, end;
1099 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1101 if (!(existing->mr->subpage)) {
1102 subpage = subpage_init(d->as, base);
1103 subsection.address_space = d->as;
1104 subsection.mr = &subpage->iomem;
1105 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1106 phys_section_add(&d->map, &subsection));
1107 } else {
1108 subpage = container_of(existing->mr, subpage_t, iomem);
1110 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1111 end = start + int128_get64(section->size) - 1;
1112 subpage_register(subpage, start, end,
1113 phys_section_add(&d->map, section));
1117 static void register_multipage(AddressSpaceDispatch *d,
1118 MemoryRegionSection *section)
1120 hwaddr start_addr = section->offset_within_address_space;
1121 uint16_t section_index = phys_section_add(&d->map, section);
1122 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1123 TARGET_PAGE_BITS));
1125 assert(num_pages);
1126 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1129 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1131 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1132 AddressSpaceDispatch *d = as->next_dispatch;
1133 MemoryRegionSection now = *section, remain = *section;
1134 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1136 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1137 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1138 - now.offset_within_address_space;
1140 now.size = int128_min(int128_make64(left), now.size);
1141 register_subpage(d, &now);
1142 } else {
1143 now.size = int128_zero();
1145 while (int128_ne(remain.size, now.size)) {
1146 remain.size = int128_sub(remain.size, now.size);
1147 remain.offset_within_address_space += int128_get64(now.size);
1148 remain.offset_within_region += int128_get64(now.size);
1149 now = remain;
1150 if (int128_lt(remain.size, page_size)) {
1151 register_subpage(d, &now);
1152 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1153 now.size = page_size;
1154 register_subpage(d, &now);
1155 } else {
1156 now.size = int128_and(now.size, int128_neg(page_size));
1157 register_multipage(d, &now);
1162 void qemu_flush_coalesced_mmio_buffer(void)
1164 if (kvm_enabled())
1165 kvm_flush_coalesced_mmio_buffer();
1168 void qemu_mutex_lock_ramlist(void)
1170 qemu_mutex_lock(&ram_list.mutex);
1173 void qemu_mutex_unlock_ramlist(void)
1175 qemu_mutex_unlock(&ram_list.mutex);
1178 #ifdef __linux__
1180 #include <sys/vfs.h>
1182 #define HUGETLBFS_MAGIC 0x958458f6
1184 static long gethugepagesize(const char *path, Error **errp)
1186 struct statfs fs;
1187 int ret;
1189 do {
1190 ret = statfs(path, &fs);
1191 } while (ret != 0 && errno == EINTR);
1193 if (ret != 0) {
1194 error_setg_errno(errp, errno, "failed to get page size of file %s",
1195 path);
1196 return 0;
1199 if (fs.f_type != HUGETLBFS_MAGIC)
1200 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1202 return fs.f_bsize;
1205 static void *file_ram_alloc(RAMBlock *block,
1206 ram_addr_t memory,
1207 const char *path,
1208 Error **errp)
1210 struct stat st;
1211 char *filename;
1212 char *sanitized_name;
1213 char *c;
1214 void *area;
1215 int fd;
1216 uint64_t hpagesize;
1217 Error *local_err = NULL;
1219 hpagesize = gethugepagesize(path, &local_err);
1220 if (local_err) {
1221 error_propagate(errp, local_err);
1222 goto error;
1224 block->mr->align = hpagesize;
1226 if (memory < hpagesize) {
1227 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1228 "or larger than huge page size 0x%" PRIx64,
1229 memory, hpagesize);
1230 goto error;
1233 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1234 error_setg(errp,
1235 "host lacks kvm mmu notifiers, -mem-path unsupported");
1236 goto error;
1239 if (!stat(path, &st) && S_ISDIR(st.st_mode)) {
1240 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1241 sanitized_name = g_strdup(memory_region_name(block->mr));
1242 for (c = sanitized_name; *c != '\0'; c++) {
1243 if (*c == '/') {
1244 *c = '_';
1248 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1249 sanitized_name);
1250 g_free(sanitized_name);
1252 fd = mkstemp(filename);
1253 if (fd >= 0) {
1254 unlink(filename);
1256 g_free(filename);
1257 } else {
1258 fd = open(path, O_RDWR | O_CREAT, 0644);
1261 if (fd < 0) {
1262 error_setg_errno(errp, errno,
1263 "unable to create backing store for hugepages");
1264 goto error;
1267 memory = ROUND_UP(memory, hpagesize);
1270 * ftruncate is not supported by hugetlbfs in older
1271 * hosts, so don't bother bailing out on errors.
1272 * If anything goes wrong with it under other filesystems,
1273 * mmap will fail.
1275 if (ftruncate(fd, memory)) {
1276 perror("ftruncate");
1279 area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
1280 if (area == MAP_FAILED) {
1281 error_setg_errno(errp, errno,
1282 "unable to map backing store for hugepages");
1283 close(fd);
1284 goto error;
1287 if (mem_prealloc) {
1288 os_mem_prealloc(fd, area, memory);
1291 block->fd = fd;
1292 return area;
1294 error:
1295 return NULL;
1297 #endif
1299 /* Called with the ramlist lock held. */
1300 static ram_addr_t find_ram_offset(ram_addr_t size)
1302 RAMBlock *block, *next_block;
1303 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1305 assert(size != 0); /* it would hand out same offset multiple times */
1307 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1308 return 0;
1311 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1312 ram_addr_t end, next = RAM_ADDR_MAX;
1314 end = block->offset + block->max_length;
1316 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1317 if (next_block->offset >= end) {
1318 next = MIN(next, next_block->offset);
1321 if (next - end >= size && next - end < mingap) {
1322 offset = end;
1323 mingap = next - end;
1327 if (offset == RAM_ADDR_MAX) {
1328 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1329 (uint64_t)size);
1330 abort();
1333 return offset;
1336 ram_addr_t last_ram_offset(void)
1338 RAMBlock *block;
1339 ram_addr_t last = 0;
1341 rcu_read_lock();
1342 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1343 last = MAX(last, block->offset + block->max_length);
1345 rcu_read_unlock();
1346 return last;
1349 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1351 int ret;
1353 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1354 if (!machine_dump_guest_core(current_machine)) {
1355 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1356 if (ret) {
1357 perror("qemu_madvise");
1358 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1359 "but dump_guest_core=off specified\n");
1364 /* Called within an RCU critical section, or while the ramlist lock
1365 * is held.
1367 static RAMBlock *find_ram_block(ram_addr_t addr)
1369 RAMBlock *block;
1371 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1372 if (block->offset == addr) {
1373 return block;
1377 return NULL;
1380 const char *qemu_ram_get_idstr(RAMBlock *rb)
1382 return rb->idstr;
1385 /* Called with iothread lock held. */
1386 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1388 RAMBlock *new_block, *block;
1390 rcu_read_lock();
1391 new_block = find_ram_block(addr);
1392 assert(new_block);
1393 assert(!new_block->idstr[0]);
1395 if (dev) {
1396 char *id = qdev_get_dev_path(dev);
1397 if (id) {
1398 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1399 g_free(id);
1402 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1404 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1405 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1406 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1407 new_block->idstr);
1408 abort();
1411 rcu_read_unlock();
1414 /* Called with iothread lock held. */
1415 void qemu_ram_unset_idstr(ram_addr_t addr)
1417 RAMBlock *block;
1419 /* FIXME: arch_init.c assumes that this is not called throughout
1420 * migration. Ignore the problem since hot-unplug during migration
1421 * does not work anyway.
1424 rcu_read_lock();
1425 block = find_ram_block(addr);
1426 if (block) {
1427 memset(block->idstr, 0, sizeof(block->idstr));
1429 rcu_read_unlock();
1432 static int memory_try_enable_merging(void *addr, size_t len)
1434 if (!machine_mem_merge(current_machine)) {
1435 /* disabled by the user */
1436 return 0;
1439 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1442 /* Only legal before guest might have detected the memory size: e.g. on
1443 * incoming migration, or right after reset.
1445 * As memory core doesn't know how is memory accessed, it is up to
1446 * resize callback to update device state and/or add assertions to detect
1447 * misuse, if necessary.
1449 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1451 RAMBlock *block = find_ram_block(base);
1453 assert(block);
1455 newsize = HOST_PAGE_ALIGN(newsize);
1457 if (block->used_length == newsize) {
1458 return 0;
1461 if (!(block->flags & RAM_RESIZEABLE)) {
1462 error_setg_errno(errp, EINVAL,
1463 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1464 " in != 0x" RAM_ADDR_FMT, block->idstr,
1465 newsize, block->used_length);
1466 return -EINVAL;
1469 if (block->max_length < newsize) {
1470 error_setg_errno(errp, EINVAL,
1471 "Length too large: %s: 0x" RAM_ADDR_FMT
1472 " > 0x" RAM_ADDR_FMT, block->idstr,
1473 newsize, block->max_length);
1474 return -EINVAL;
1477 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1478 block->used_length = newsize;
1479 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1480 DIRTY_CLIENTS_ALL);
1481 memory_region_set_size(block->mr, newsize);
1482 if (block->resized) {
1483 block->resized(block->idstr, newsize, block->host);
1485 return 0;
1488 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1490 RAMBlock *block;
1491 RAMBlock *last_block = NULL;
1492 ram_addr_t old_ram_size, new_ram_size;
1494 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1496 qemu_mutex_lock_ramlist();
1497 new_block->offset = find_ram_offset(new_block->max_length);
1499 if (!new_block->host) {
1500 if (xen_enabled()) {
1501 xen_ram_alloc(new_block->offset, new_block->max_length,
1502 new_block->mr);
1503 } else {
1504 new_block->host = phys_mem_alloc(new_block->max_length,
1505 &new_block->mr->align);
1506 if (!new_block->host) {
1507 error_setg_errno(errp, errno,
1508 "cannot set up guest memory '%s'",
1509 memory_region_name(new_block->mr));
1510 qemu_mutex_unlock_ramlist();
1511 return -1;
1513 memory_try_enable_merging(new_block->host, new_block->max_length);
1517 new_ram_size = MAX(old_ram_size,
1518 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1519 if (new_ram_size > old_ram_size) {
1520 migration_bitmap_extend(old_ram_size, new_ram_size);
1522 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1523 * QLIST (which has an RCU-friendly variant) does not have insertion at
1524 * tail, so save the last element in last_block.
1526 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1527 last_block = block;
1528 if (block->max_length < new_block->max_length) {
1529 break;
1532 if (block) {
1533 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1534 } else if (last_block) {
1535 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1536 } else { /* list is empty */
1537 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1539 ram_list.mru_block = NULL;
1541 /* Write list before version */
1542 smp_wmb();
1543 ram_list.version++;
1544 qemu_mutex_unlock_ramlist();
1546 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1548 if (new_ram_size > old_ram_size) {
1549 int i;
1551 /* ram_list.dirty_memory[] is protected by the iothread lock. */
1552 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1553 ram_list.dirty_memory[i] =
1554 bitmap_zero_extend(ram_list.dirty_memory[i],
1555 old_ram_size, new_ram_size);
1558 cpu_physical_memory_set_dirty_range(new_block->offset,
1559 new_block->used_length,
1560 DIRTY_CLIENTS_ALL);
1562 if (new_block->host) {
1563 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1564 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1565 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1566 if (kvm_enabled()) {
1567 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1571 return new_block->offset;
1574 #ifdef __linux__
1575 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1576 bool share, const char *mem_path,
1577 Error **errp)
1579 RAMBlock *new_block;
1580 ram_addr_t addr;
1581 Error *local_err = NULL;
1583 if (xen_enabled()) {
1584 error_setg(errp, "-mem-path not supported with Xen");
1585 return -1;
1588 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1590 * file_ram_alloc() needs to allocate just like
1591 * phys_mem_alloc, but we haven't bothered to provide
1592 * a hook there.
1594 error_setg(errp,
1595 "-mem-path not supported with this accelerator");
1596 return -1;
1599 size = HOST_PAGE_ALIGN(size);
1600 new_block = g_malloc0(sizeof(*new_block));
1601 new_block->mr = mr;
1602 new_block->used_length = size;
1603 new_block->max_length = size;
1604 new_block->flags = share ? RAM_SHARED : 0;
1605 new_block->flags |= RAM_FILE;
1606 new_block->host = file_ram_alloc(new_block, size,
1607 mem_path, errp);
1608 if (!new_block->host) {
1609 g_free(new_block);
1610 return -1;
1613 addr = ram_block_add(new_block, &local_err);
1614 if (local_err) {
1615 g_free(new_block);
1616 error_propagate(errp, local_err);
1617 return -1;
1619 return addr;
1621 #endif
1623 static
1624 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1625 void (*resized)(const char*,
1626 uint64_t length,
1627 void *host),
1628 void *host, bool resizeable,
1629 MemoryRegion *mr, Error **errp)
1631 RAMBlock *new_block;
1632 ram_addr_t addr;
1633 Error *local_err = NULL;
1635 size = HOST_PAGE_ALIGN(size);
1636 max_size = HOST_PAGE_ALIGN(max_size);
1637 new_block = g_malloc0(sizeof(*new_block));
1638 new_block->mr = mr;
1639 new_block->resized = resized;
1640 new_block->used_length = size;
1641 new_block->max_length = max_size;
1642 assert(max_size >= size);
1643 new_block->fd = -1;
1644 new_block->host = host;
1645 if (host) {
1646 new_block->flags |= RAM_PREALLOC;
1648 if (resizeable) {
1649 new_block->flags |= RAM_RESIZEABLE;
1651 addr = ram_block_add(new_block, &local_err);
1652 if (local_err) {
1653 g_free(new_block);
1654 error_propagate(errp, local_err);
1655 return -1;
1657 return addr;
1660 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1661 MemoryRegion *mr, Error **errp)
1663 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1666 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1668 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1671 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1672 void (*resized)(const char*,
1673 uint64_t length,
1674 void *host),
1675 MemoryRegion *mr, Error **errp)
1677 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1680 void qemu_ram_free_from_ptr(ram_addr_t addr)
1682 RAMBlock *block;
1684 qemu_mutex_lock_ramlist();
1685 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1686 if (addr == block->offset) {
1687 QLIST_REMOVE_RCU(block, next);
1688 ram_list.mru_block = NULL;
1689 /* Write list before version */
1690 smp_wmb();
1691 ram_list.version++;
1692 g_free_rcu(block, rcu);
1693 break;
1696 qemu_mutex_unlock_ramlist();
1699 static void reclaim_ramblock(RAMBlock *block)
1701 if (block->flags & RAM_PREALLOC) {
1703 } else if (xen_enabled()) {
1704 xen_invalidate_map_cache_entry(block->host);
1705 #ifndef _WIN32
1706 } else if (block->fd >= 0) {
1707 if (block->flags & RAM_FILE) {
1708 qemu_ram_munmap(block->host, block->max_length);
1709 } else {
1710 munmap(block->host, block->max_length);
1712 close(block->fd);
1713 #endif
1714 } else {
1715 qemu_anon_ram_free(block->host, block->max_length);
1717 g_free(block);
1720 void qemu_ram_free(ram_addr_t addr)
1722 RAMBlock *block;
1724 qemu_mutex_lock_ramlist();
1725 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1726 if (addr == block->offset) {
1727 QLIST_REMOVE_RCU(block, next);
1728 ram_list.mru_block = NULL;
1729 /* Write list before version */
1730 smp_wmb();
1731 ram_list.version++;
1732 call_rcu(block, reclaim_ramblock, rcu);
1733 break;
1736 qemu_mutex_unlock_ramlist();
1739 #ifndef _WIN32
1740 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1742 RAMBlock *block;
1743 ram_addr_t offset;
1744 int flags;
1745 void *area, *vaddr;
1747 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1748 offset = addr - block->offset;
1749 if (offset < block->max_length) {
1750 vaddr = ramblock_ptr(block, offset);
1751 if (block->flags & RAM_PREALLOC) {
1753 } else if (xen_enabled()) {
1754 abort();
1755 } else {
1756 flags = MAP_FIXED;
1757 if (block->fd >= 0) {
1758 flags |= (block->flags & RAM_SHARED ?
1759 MAP_SHARED : MAP_PRIVATE);
1760 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1761 flags, block->fd, offset);
1762 } else {
1764 * Remap needs to match alloc. Accelerators that
1765 * set phys_mem_alloc never remap. If they did,
1766 * we'd need a remap hook here.
1768 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1770 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1771 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1772 flags, -1, 0);
1774 if (area != vaddr) {
1775 fprintf(stderr, "Could not remap addr: "
1776 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1777 length, addr);
1778 exit(1);
1780 memory_try_enable_merging(vaddr, length);
1781 qemu_ram_setup_dump(vaddr, length);
1786 #endif /* !_WIN32 */
1788 int qemu_get_ram_fd(ram_addr_t addr)
1790 RAMBlock *block;
1791 int fd;
1793 rcu_read_lock();
1794 block = qemu_get_ram_block(addr);
1795 fd = block->fd;
1796 rcu_read_unlock();
1797 return fd;
1800 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1802 RAMBlock *block;
1803 void *ptr;
1805 rcu_read_lock();
1806 block = qemu_get_ram_block(addr);
1807 ptr = ramblock_ptr(block, 0);
1808 rcu_read_unlock();
1809 return ptr;
1812 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1813 * This should not be used for general purpose DMA. Use address_space_map
1814 * or address_space_rw instead. For local memory (e.g. video ram) that the
1815 * device owns, use memory_region_get_ram_ptr.
1817 * By the time this function returns, the returned pointer is not protected
1818 * by RCU anymore. If the caller is not within an RCU critical section and
1819 * does not hold the iothread lock, it must have other means of protecting the
1820 * pointer, such as a reference to the region that includes the incoming
1821 * ram_addr_t.
1823 void *qemu_get_ram_ptr(ram_addr_t addr)
1825 RAMBlock *block;
1826 void *ptr;
1828 rcu_read_lock();
1829 block = qemu_get_ram_block(addr);
1831 if (xen_enabled() && block->host == NULL) {
1832 /* We need to check if the requested address is in the RAM
1833 * because we don't want to map the entire memory in QEMU.
1834 * In that case just map until the end of the page.
1836 if (block->offset == 0) {
1837 ptr = xen_map_cache(addr, 0, 0);
1838 goto unlock;
1841 block->host = xen_map_cache(block->offset, block->max_length, 1);
1843 ptr = ramblock_ptr(block, addr - block->offset);
1845 unlock:
1846 rcu_read_unlock();
1847 return ptr;
1850 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1851 * but takes a size argument.
1853 * By the time this function returns, the returned pointer is not protected
1854 * by RCU anymore. If the caller is not within an RCU critical section and
1855 * does not hold the iothread lock, it must have other means of protecting the
1856 * pointer, such as a reference to the region that includes the incoming
1857 * ram_addr_t.
1859 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1861 void *ptr;
1862 if (*size == 0) {
1863 return NULL;
1865 if (xen_enabled()) {
1866 return xen_map_cache(addr, *size, 1);
1867 } else {
1868 RAMBlock *block;
1869 rcu_read_lock();
1870 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1871 if (addr - block->offset < block->max_length) {
1872 if (addr - block->offset + *size > block->max_length)
1873 *size = block->max_length - addr + block->offset;
1874 ptr = ramblock_ptr(block, addr - block->offset);
1875 rcu_read_unlock();
1876 return ptr;
1880 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1881 abort();
1886 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1887 * in that RAMBlock.
1889 * ptr: Host pointer to look up
1890 * round_offset: If true round the result offset down to a page boundary
1891 * *ram_addr: set to result ram_addr
1892 * *offset: set to result offset within the RAMBlock
1894 * Returns: RAMBlock (or NULL if not found)
1896 * By the time this function returns, the returned pointer is not protected
1897 * by RCU anymore. If the caller is not within an RCU critical section and
1898 * does not hold the iothread lock, it must have other means of protecting the
1899 * pointer, such as a reference to the region that includes the incoming
1900 * ram_addr_t.
1902 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1903 ram_addr_t *ram_addr,
1904 ram_addr_t *offset)
1906 RAMBlock *block;
1907 uint8_t *host = ptr;
1909 if (xen_enabled()) {
1910 rcu_read_lock();
1911 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1912 block = qemu_get_ram_block(*ram_addr);
1913 if (block) {
1914 *offset = (host - block->host);
1916 rcu_read_unlock();
1917 return block;
1920 rcu_read_lock();
1921 block = atomic_rcu_read(&ram_list.mru_block);
1922 if (block && block->host && host - block->host < block->max_length) {
1923 goto found;
1926 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1927 /* This case append when the block is not mapped. */
1928 if (block->host == NULL) {
1929 continue;
1931 if (host - block->host < block->max_length) {
1932 goto found;
1936 rcu_read_unlock();
1937 return NULL;
1939 found:
1940 *offset = (host - block->host);
1941 if (round_offset) {
1942 *offset &= TARGET_PAGE_MASK;
1944 *ram_addr = block->offset + *offset;
1945 rcu_read_unlock();
1946 return block;
1950 * Finds the named RAMBlock
1952 * name: The name of RAMBlock to find
1954 * Returns: RAMBlock (or NULL if not found)
1956 RAMBlock *qemu_ram_block_by_name(const char *name)
1958 RAMBlock *block;
1960 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1961 if (!strcmp(name, block->idstr)) {
1962 return block;
1966 return NULL;
1969 /* Some of the softmmu routines need to translate from a host pointer
1970 (typically a TLB entry) back to a ram offset. */
1971 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1973 RAMBlock *block;
1974 ram_addr_t offset; /* Not used */
1976 block = qemu_ram_block_from_host(ptr, false, ram_addr, &offset);
1978 if (!block) {
1979 return NULL;
1982 return block->mr;
1985 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1986 uint64_t val, unsigned size)
1988 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1989 tb_invalidate_phys_page_fast(ram_addr, size);
1991 switch (size) {
1992 case 1:
1993 stb_p(qemu_get_ram_ptr(ram_addr), val);
1994 break;
1995 case 2:
1996 stw_p(qemu_get_ram_ptr(ram_addr), val);
1997 break;
1998 case 4:
1999 stl_p(qemu_get_ram_ptr(ram_addr), val);
2000 break;
2001 default:
2002 abort();
2004 /* Set both VGA and migration bits for simplicity and to remove
2005 * the notdirty callback faster.
2007 cpu_physical_memory_set_dirty_range(ram_addr, size,
2008 DIRTY_CLIENTS_NOCODE);
2009 /* we remove the notdirty callback only if the code has been
2010 flushed */
2011 if (!cpu_physical_memory_is_clean(ram_addr)) {
2012 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2016 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2017 unsigned size, bool is_write)
2019 return is_write;
2022 static const MemoryRegionOps notdirty_mem_ops = {
2023 .write = notdirty_mem_write,
2024 .valid.accepts = notdirty_mem_accepts,
2025 .endianness = DEVICE_NATIVE_ENDIAN,
2028 /* Generate a debug exception if a watchpoint has been hit. */
2029 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2031 CPUState *cpu = current_cpu;
2032 CPUArchState *env = cpu->env_ptr;
2033 target_ulong pc, cs_base;
2034 target_ulong vaddr;
2035 CPUWatchpoint *wp;
2036 int cpu_flags;
2038 if (cpu->watchpoint_hit) {
2039 /* We re-entered the check after replacing the TB. Now raise
2040 * the debug interrupt so that is will trigger after the
2041 * current instruction. */
2042 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2043 return;
2045 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2046 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2047 if (cpu_watchpoint_address_matches(wp, vaddr, len)
2048 && (wp->flags & flags)) {
2049 if (flags == BP_MEM_READ) {
2050 wp->flags |= BP_WATCHPOINT_HIT_READ;
2051 } else {
2052 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2054 wp->hitaddr = vaddr;
2055 wp->hitattrs = attrs;
2056 if (!cpu->watchpoint_hit) {
2057 cpu->watchpoint_hit = wp;
2058 tb_check_watchpoint(cpu);
2059 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2060 cpu->exception_index = EXCP_DEBUG;
2061 cpu_loop_exit(cpu);
2062 } else {
2063 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2064 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2065 cpu_resume_from_signal(cpu, NULL);
2068 } else {
2069 wp->flags &= ~BP_WATCHPOINT_HIT;
2074 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2075 so these check for a hit then pass through to the normal out-of-line
2076 phys routines. */
2077 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2078 unsigned size, MemTxAttrs attrs)
2080 MemTxResult res;
2081 uint64_t data;
2083 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2084 switch (size) {
2085 case 1:
2086 data = address_space_ldub(&address_space_memory, addr, attrs, &res);
2087 break;
2088 case 2:
2089 data = address_space_lduw(&address_space_memory, addr, attrs, &res);
2090 break;
2091 case 4:
2092 data = address_space_ldl(&address_space_memory, addr, attrs, &res);
2093 break;
2094 default: abort();
2096 *pdata = data;
2097 return res;
2100 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2101 uint64_t val, unsigned size,
2102 MemTxAttrs attrs)
2104 MemTxResult res;
2106 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2107 switch (size) {
2108 case 1:
2109 address_space_stb(&address_space_memory, addr, val, attrs, &res);
2110 break;
2111 case 2:
2112 address_space_stw(&address_space_memory, addr, val, attrs, &res);
2113 break;
2114 case 4:
2115 address_space_stl(&address_space_memory, addr, val, attrs, &res);
2116 break;
2117 default: abort();
2119 return res;
2122 static const MemoryRegionOps watch_mem_ops = {
2123 .read_with_attrs = watch_mem_read,
2124 .write_with_attrs = watch_mem_write,
2125 .endianness = DEVICE_NATIVE_ENDIAN,
2128 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2129 unsigned len, MemTxAttrs attrs)
2131 subpage_t *subpage = opaque;
2132 uint8_t buf[8];
2133 MemTxResult res;
2135 #if defined(DEBUG_SUBPAGE)
2136 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2137 subpage, len, addr);
2138 #endif
2139 res = address_space_read(subpage->as, addr + subpage->base,
2140 attrs, buf, len);
2141 if (res) {
2142 return res;
2144 switch (len) {
2145 case 1:
2146 *data = ldub_p(buf);
2147 return MEMTX_OK;
2148 case 2:
2149 *data = lduw_p(buf);
2150 return MEMTX_OK;
2151 case 4:
2152 *data = ldl_p(buf);
2153 return MEMTX_OK;
2154 case 8:
2155 *data = ldq_p(buf);
2156 return MEMTX_OK;
2157 default:
2158 abort();
2162 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2163 uint64_t value, unsigned len, MemTxAttrs attrs)
2165 subpage_t *subpage = opaque;
2166 uint8_t buf[8];
2168 #if defined(DEBUG_SUBPAGE)
2169 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2170 " value %"PRIx64"\n",
2171 __func__, subpage, len, addr, value);
2172 #endif
2173 switch (len) {
2174 case 1:
2175 stb_p(buf, value);
2176 break;
2177 case 2:
2178 stw_p(buf, value);
2179 break;
2180 case 4:
2181 stl_p(buf, value);
2182 break;
2183 case 8:
2184 stq_p(buf, value);
2185 break;
2186 default:
2187 abort();
2189 return address_space_write(subpage->as, addr + subpage->base,
2190 attrs, buf, len);
2193 static bool subpage_accepts(void *opaque, hwaddr addr,
2194 unsigned len, bool is_write)
2196 subpage_t *subpage = opaque;
2197 #if defined(DEBUG_SUBPAGE)
2198 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2199 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2200 #endif
2202 return address_space_access_valid(subpage->as, addr + subpage->base,
2203 len, is_write);
2206 static const MemoryRegionOps subpage_ops = {
2207 .read_with_attrs = subpage_read,
2208 .write_with_attrs = subpage_write,
2209 .impl.min_access_size = 1,
2210 .impl.max_access_size = 8,
2211 .valid.min_access_size = 1,
2212 .valid.max_access_size = 8,
2213 .valid.accepts = subpage_accepts,
2214 .endianness = DEVICE_NATIVE_ENDIAN,
2217 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2218 uint16_t section)
2220 int idx, eidx;
2222 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2223 return -1;
2224 idx = SUBPAGE_IDX(start);
2225 eidx = SUBPAGE_IDX(end);
2226 #if defined(DEBUG_SUBPAGE)
2227 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2228 __func__, mmio, start, end, idx, eidx, section);
2229 #endif
2230 for (; idx <= eidx; idx++) {
2231 mmio->sub_section[idx] = section;
2234 return 0;
2237 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2239 subpage_t *mmio;
2241 mmio = g_malloc0(sizeof(subpage_t));
2243 mmio->as = as;
2244 mmio->base = base;
2245 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2246 NULL, TARGET_PAGE_SIZE);
2247 mmio->iomem.subpage = true;
2248 #if defined(DEBUG_SUBPAGE)
2249 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2250 mmio, base, TARGET_PAGE_SIZE);
2251 #endif
2252 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2254 return mmio;
2257 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2258 MemoryRegion *mr)
2260 assert(as);
2261 MemoryRegionSection section = {
2262 .address_space = as,
2263 .mr = mr,
2264 .offset_within_address_space = 0,
2265 .offset_within_region = 0,
2266 .size = int128_2_64(),
2269 return phys_section_add(map, &section);
2272 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
2274 CPUAddressSpace *cpuas = &cpu->cpu_ases[0];
2275 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2276 MemoryRegionSection *sections = d->map.sections;
2278 return sections[index & ~TARGET_PAGE_MASK].mr;
2281 static void io_mem_init(void)
2283 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2284 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2285 NULL, UINT64_MAX);
2286 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2287 NULL, UINT64_MAX);
2288 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2289 NULL, UINT64_MAX);
2292 static void mem_begin(MemoryListener *listener)
2294 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2295 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2296 uint16_t n;
2298 n = dummy_section(&d->map, as, &io_mem_unassigned);
2299 assert(n == PHYS_SECTION_UNASSIGNED);
2300 n = dummy_section(&d->map, as, &io_mem_notdirty);
2301 assert(n == PHYS_SECTION_NOTDIRTY);
2302 n = dummy_section(&d->map, as, &io_mem_rom);
2303 assert(n == PHYS_SECTION_ROM);
2304 n = dummy_section(&d->map, as, &io_mem_watch);
2305 assert(n == PHYS_SECTION_WATCH);
2307 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2308 d->as = as;
2309 as->next_dispatch = d;
2312 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2314 phys_sections_free(&d->map);
2315 g_free(d);
2318 static void mem_commit(MemoryListener *listener)
2320 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2321 AddressSpaceDispatch *cur = as->dispatch;
2322 AddressSpaceDispatch *next = as->next_dispatch;
2324 phys_page_compact_all(next, next->map.nodes_nb);
2326 atomic_rcu_set(&as->dispatch, next);
2327 if (cur) {
2328 call_rcu(cur, address_space_dispatch_free, rcu);
2332 static void tcg_commit(MemoryListener *listener)
2334 CPUAddressSpace *cpuas;
2335 AddressSpaceDispatch *d;
2337 /* since each CPU stores ram addresses in its TLB cache, we must
2338 reset the modified entries */
2339 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2340 cpu_reloading_memory_map();
2341 /* The CPU and TLB are protected by the iothread lock.
2342 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2343 * may have split the RCU critical section.
2345 d = atomic_rcu_read(&cpuas->as->dispatch);
2346 cpuas->memory_dispatch = d;
2347 tlb_flush(cpuas->cpu, 1);
2350 void address_space_init_dispatch(AddressSpace *as)
2352 as->dispatch = NULL;
2353 as->dispatch_listener = (MemoryListener) {
2354 .begin = mem_begin,
2355 .commit = mem_commit,
2356 .region_add = mem_add,
2357 .region_nop = mem_add,
2358 .priority = 0,
2360 memory_listener_register(&as->dispatch_listener, as);
2363 void address_space_unregister(AddressSpace *as)
2365 memory_listener_unregister(&as->dispatch_listener);
2368 void address_space_destroy_dispatch(AddressSpace *as)
2370 AddressSpaceDispatch *d = as->dispatch;
2372 atomic_rcu_set(&as->dispatch, NULL);
2373 if (d) {
2374 call_rcu(d, address_space_dispatch_free, rcu);
2378 static void memory_map_init(void)
2380 system_memory = g_malloc(sizeof(*system_memory));
2382 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2383 address_space_init(&address_space_memory, system_memory, "memory");
2385 system_io = g_malloc(sizeof(*system_io));
2386 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2387 65536);
2388 address_space_init(&address_space_io, system_io, "I/O");
2391 MemoryRegion *get_system_memory(void)
2393 return system_memory;
2396 MemoryRegion *get_system_io(void)
2398 return system_io;
2401 #endif /* !defined(CONFIG_USER_ONLY) */
2403 /* physical memory access (slow version, mainly for debug) */
2404 #if defined(CONFIG_USER_ONLY)
2405 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2406 uint8_t *buf, int len, int is_write)
2408 int l, flags;
2409 target_ulong page;
2410 void * p;
2412 while (len > 0) {
2413 page = addr & TARGET_PAGE_MASK;
2414 l = (page + TARGET_PAGE_SIZE) - addr;
2415 if (l > len)
2416 l = len;
2417 flags = page_get_flags(page);
2418 if (!(flags & PAGE_VALID))
2419 return -1;
2420 if (is_write) {
2421 if (!(flags & PAGE_WRITE))
2422 return -1;
2423 /* XXX: this code should not depend on lock_user */
2424 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2425 return -1;
2426 memcpy(p, buf, l);
2427 unlock_user(p, addr, l);
2428 } else {
2429 if (!(flags & PAGE_READ))
2430 return -1;
2431 /* XXX: this code should not depend on lock_user */
2432 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2433 return -1;
2434 memcpy(buf, p, l);
2435 unlock_user(p, addr, 0);
2437 len -= l;
2438 buf += l;
2439 addr += l;
2441 return 0;
2444 #else
2446 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2447 hwaddr length)
2449 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2450 /* No early return if dirty_log_mask is or becomes 0, because
2451 * cpu_physical_memory_set_dirty_range will still call
2452 * xen_modified_memory.
2454 if (dirty_log_mask) {
2455 dirty_log_mask =
2456 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2458 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2459 tb_invalidate_phys_range(addr, addr + length);
2460 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2462 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2465 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2467 unsigned access_size_max = mr->ops->valid.max_access_size;
2469 /* Regions are assumed to support 1-4 byte accesses unless
2470 otherwise specified. */
2471 if (access_size_max == 0) {
2472 access_size_max = 4;
2475 /* Bound the maximum access by the alignment of the address. */
2476 if (!mr->ops->impl.unaligned) {
2477 unsigned align_size_max = addr & -addr;
2478 if (align_size_max != 0 && align_size_max < access_size_max) {
2479 access_size_max = align_size_max;
2483 /* Don't attempt accesses larger than the maximum. */
2484 if (l > access_size_max) {
2485 l = access_size_max;
2487 l = pow2floor(l);
2489 return l;
2492 static bool prepare_mmio_access(MemoryRegion *mr)
2494 bool unlocked = !qemu_mutex_iothread_locked();
2495 bool release_lock = false;
2497 if (unlocked && mr->global_locking) {
2498 qemu_mutex_lock_iothread();
2499 unlocked = false;
2500 release_lock = true;
2502 if (mr->flush_coalesced_mmio) {
2503 if (unlocked) {
2504 qemu_mutex_lock_iothread();
2506 qemu_flush_coalesced_mmio_buffer();
2507 if (unlocked) {
2508 qemu_mutex_unlock_iothread();
2512 return release_lock;
2515 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2516 uint8_t *buf, int len, bool is_write)
2518 hwaddr l;
2519 uint8_t *ptr;
2520 uint64_t val;
2521 hwaddr addr1;
2522 MemoryRegion *mr;
2523 MemTxResult result = MEMTX_OK;
2524 bool release_lock = false;
2526 rcu_read_lock();
2527 while (len > 0) {
2528 l = len;
2529 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2531 if (is_write) {
2532 if (!memory_access_is_direct(mr, is_write)) {
2533 release_lock |= prepare_mmio_access(mr);
2534 l = memory_access_size(mr, l, addr1);
2535 /* XXX: could force current_cpu to NULL to avoid
2536 potential bugs */
2537 switch (l) {
2538 case 8:
2539 /* 64 bit write access */
2540 val = ldq_p(buf);
2541 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2542 attrs);
2543 break;
2544 case 4:
2545 /* 32 bit write access */
2546 val = ldl_p(buf);
2547 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2548 attrs);
2549 break;
2550 case 2:
2551 /* 16 bit write access */
2552 val = lduw_p(buf);
2553 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2554 attrs);
2555 break;
2556 case 1:
2557 /* 8 bit write access */
2558 val = ldub_p(buf);
2559 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2560 attrs);
2561 break;
2562 default:
2563 abort();
2565 } else {
2566 addr1 += memory_region_get_ram_addr(mr);
2567 /* RAM case */
2568 ptr = qemu_get_ram_ptr(addr1);
2569 memcpy(ptr, buf, l);
2570 invalidate_and_set_dirty(mr, addr1, l);
2572 } else {
2573 if (!memory_access_is_direct(mr, is_write)) {
2574 /* I/O case */
2575 release_lock |= prepare_mmio_access(mr);
2576 l = memory_access_size(mr, l, addr1);
2577 switch (l) {
2578 case 8:
2579 /* 64 bit read access */
2580 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2581 attrs);
2582 stq_p(buf, val);
2583 break;
2584 case 4:
2585 /* 32 bit read access */
2586 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2587 attrs);
2588 stl_p(buf, val);
2589 break;
2590 case 2:
2591 /* 16 bit read access */
2592 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2593 attrs);
2594 stw_p(buf, val);
2595 break;
2596 case 1:
2597 /* 8 bit read access */
2598 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2599 attrs);
2600 stb_p(buf, val);
2601 break;
2602 default:
2603 abort();
2605 } else {
2606 /* RAM case */
2607 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2608 memcpy(buf, ptr, l);
2612 if (release_lock) {
2613 qemu_mutex_unlock_iothread();
2614 release_lock = false;
2617 len -= l;
2618 buf += l;
2619 addr += l;
2621 rcu_read_unlock();
2623 return result;
2626 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2627 const uint8_t *buf, int len)
2629 return address_space_rw(as, addr, attrs, (uint8_t *)buf, len, true);
2632 MemTxResult address_space_read(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2633 uint8_t *buf, int len)
2635 return address_space_rw(as, addr, attrs, buf, len, false);
2639 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2640 int len, int is_write)
2642 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2643 buf, len, is_write);
2646 enum write_rom_type {
2647 WRITE_DATA,
2648 FLUSH_CACHE,
2651 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2652 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2654 hwaddr l;
2655 uint8_t *ptr;
2656 hwaddr addr1;
2657 MemoryRegion *mr;
2659 rcu_read_lock();
2660 while (len > 0) {
2661 l = len;
2662 mr = address_space_translate(as, addr, &addr1, &l, true);
2664 if (!(memory_region_is_ram(mr) ||
2665 memory_region_is_romd(mr))) {
2666 l = memory_access_size(mr, l, addr1);
2667 } else {
2668 addr1 += memory_region_get_ram_addr(mr);
2669 /* ROM/RAM case */
2670 ptr = qemu_get_ram_ptr(addr1);
2671 switch (type) {
2672 case WRITE_DATA:
2673 memcpy(ptr, buf, l);
2674 invalidate_and_set_dirty(mr, addr1, l);
2675 break;
2676 case FLUSH_CACHE:
2677 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2678 break;
2681 len -= l;
2682 buf += l;
2683 addr += l;
2685 rcu_read_unlock();
2688 /* used for ROM loading : can write in RAM and ROM */
2689 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2690 const uint8_t *buf, int len)
2692 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2695 void cpu_flush_icache_range(hwaddr start, int len)
2698 * This function should do the same thing as an icache flush that was
2699 * triggered from within the guest. For TCG we are always cache coherent,
2700 * so there is no need to flush anything. For KVM / Xen we need to flush
2701 * the host's instruction cache at least.
2703 if (tcg_enabled()) {
2704 return;
2707 cpu_physical_memory_write_rom_internal(&address_space_memory,
2708 start, NULL, len, FLUSH_CACHE);
2711 typedef struct {
2712 MemoryRegion *mr;
2713 void *buffer;
2714 hwaddr addr;
2715 hwaddr len;
2716 bool in_use;
2717 } BounceBuffer;
2719 static BounceBuffer bounce;
2721 typedef struct MapClient {
2722 QEMUBH *bh;
2723 QLIST_ENTRY(MapClient) link;
2724 } MapClient;
2726 QemuMutex map_client_list_lock;
2727 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2728 = QLIST_HEAD_INITIALIZER(map_client_list);
2730 static void cpu_unregister_map_client_do(MapClient *client)
2732 QLIST_REMOVE(client, link);
2733 g_free(client);
2736 static void cpu_notify_map_clients_locked(void)
2738 MapClient *client;
2740 while (!QLIST_EMPTY(&map_client_list)) {
2741 client = QLIST_FIRST(&map_client_list);
2742 qemu_bh_schedule(client->bh);
2743 cpu_unregister_map_client_do(client);
2747 void cpu_register_map_client(QEMUBH *bh)
2749 MapClient *client = g_malloc(sizeof(*client));
2751 qemu_mutex_lock(&map_client_list_lock);
2752 client->bh = bh;
2753 QLIST_INSERT_HEAD(&map_client_list, client, link);
2754 if (!atomic_read(&bounce.in_use)) {
2755 cpu_notify_map_clients_locked();
2757 qemu_mutex_unlock(&map_client_list_lock);
2760 void cpu_exec_init_all(void)
2762 qemu_mutex_init(&ram_list.mutex);
2763 io_mem_init();
2764 memory_map_init();
2765 qemu_mutex_init(&map_client_list_lock);
2768 void cpu_unregister_map_client(QEMUBH *bh)
2770 MapClient *client;
2772 qemu_mutex_lock(&map_client_list_lock);
2773 QLIST_FOREACH(client, &map_client_list, link) {
2774 if (client->bh == bh) {
2775 cpu_unregister_map_client_do(client);
2776 break;
2779 qemu_mutex_unlock(&map_client_list_lock);
2782 static void cpu_notify_map_clients(void)
2784 qemu_mutex_lock(&map_client_list_lock);
2785 cpu_notify_map_clients_locked();
2786 qemu_mutex_unlock(&map_client_list_lock);
2789 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2791 MemoryRegion *mr;
2792 hwaddr l, xlat;
2794 rcu_read_lock();
2795 while (len > 0) {
2796 l = len;
2797 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2798 if (!memory_access_is_direct(mr, is_write)) {
2799 l = memory_access_size(mr, l, addr);
2800 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2801 return false;
2805 len -= l;
2806 addr += l;
2808 rcu_read_unlock();
2809 return true;
2812 /* Map a physical memory region into a host virtual address.
2813 * May map a subset of the requested range, given by and returned in *plen.
2814 * May return NULL if resources needed to perform the mapping are exhausted.
2815 * Use only for reads OR writes - not for read-modify-write operations.
2816 * Use cpu_register_map_client() to know when retrying the map operation is
2817 * likely to succeed.
2819 void *address_space_map(AddressSpace *as,
2820 hwaddr addr,
2821 hwaddr *plen,
2822 bool is_write)
2824 hwaddr len = *plen;
2825 hwaddr done = 0;
2826 hwaddr l, xlat, base;
2827 MemoryRegion *mr, *this_mr;
2828 ram_addr_t raddr;
2830 if (len == 0) {
2831 return NULL;
2834 l = len;
2835 rcu_read_lock();
2836 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2838 if (!memory_access_is_direct(mr, is_write)) {
2839 if (atomic_xchg(&bounce.in_use, true)) {
2840 rcu_read_unlock();
2841 return NULL;
2843 /* Avoid unbounded allocations */
2844 l = MIN(l, TARGET_PAGE_SIZE);
2845 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2846 bounce.addr = addr;
2847 bounce.len = l;
2849 memory_region_ref(mr);
2850 bounce.mr = mr;
2851 if (!is_write) {
2852 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2853 bounce.buffer, l);
2856 rcu_read_unlock();
2857 *plen = l;
2858 return bounce.buffer;
2861 base = xlat;
2862 raddr = memory_region_get_ram_addr(mr);
2864 for (;;) {
2865 len -= l;
2866 addr += l;
2867 done += l;
2868 if (len == 0) {
2869 break;
2872 l = len;
2873 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2874 if (this_mr != mr || xlat != base + done) {
2875 break;
2879 memory_region_ref(mr);
2880 rcu_read_unlock();
2881 *plen = done;
2882 return qemu_ram_ptr_length(raddr + base, plen);
2885 /* Unmaps a memory region previously mapped by address_space_map().
2886 * Will also mark the memory as dirty if is_write == 1. access_len gives
2887 * the amount of memory that was actually read or written by the caller.
2889 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2890 int is_write, hwaddr access_len)
2892 if (buffer != bounce.buffer) {
2893 MemoryRegion *mr;
2894 ram_addr_t addr1;
2896 mr = qemu_ram_addr_from_host(buffer, &addr1);
2897 assert(mr != NULL);
2898 if (is_write) {
2899 invalidate_and_set_dirty(mr, addr1, access_len);
2901 if (xen_enabled()) {
2902 xen_invalidate_map_cache_entry(buffer);
2904 memory_region_unref(mr);
2905 return;
2907 if (is_write) {
2908 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
2909 bounce.buffer, access_len);
2911 qemu_vfree(bounce.buffer);
2912 bounce.buffer = NULL;
2913 memory_region_unref(bounce.mr);
2914 atomic_mb_set(&bounce.in_use, false);
2915 cpu_notify_map_clients();
2918 void *cpu_physical_memory_map(hwaddr addr,
2919 hwaddr *plen,
2920 int is_write)
2922 return address_space_map(&address_space_memory, addr, plen, is_write);
2925 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2926 int is_write, hwaddr access_len)
2928 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2931 /* warning: addr must be aligned */
2932 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
2933 MemTxAttrs attrs,
2934 MemTxResult *result,
2935 enum device_endian endian)
2937 uint8_t *ptr;
2938 uint64_t val;
2939 MemoryRegion *mr;
2940 hwaddr l = 4;
2941 hwaddr addr1;
2942 MemTxResult r;
2943 bool release_lock = false;
2945 rcu_read_lock();
2946 mr = address_space_translate(as, addr, &addr1, &l, false);
2947 if (l < 4 || !memory_access_is_direct(mr, false)) {
2948 release_lock |= prepare_mmio_access(mr);
2950 /* I/O case */
2951 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
2952 #if defined(TARGET_WORDS_BIGENDIAN)
2953 if (endian == DEVICE_LITTLE_ENDIAN) {
2954 val = bswap32(val);
2956 #else
2957 if (endian == DEVICE_BIG_ENDIAN) {
2958 val = bswap32(val);
2960 #endif
2961 } else {
2962 /* RAM case */
2963 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2964 & TARGET_PAGE_MASK)
2965 + addr1);
2966 switch (endian) {
2967 case DEVICE_LITTLE_ENDIAN:
2968 val = ldl_le_p(ptr);
2969 break;
2970 case DEVICE_BIG_ENDIAN:
2971 val = ldl_be_p(ptr);
2972 break;
2973 default:
2974 val = ldl_p(ptr);
2975 break;
2977 r = MEMTX_OK;
2979 if (result) {
2980 *result = r;
2982 if (release_lock) {
2983 qemu_mutex_unlock_iothread();
2985 rcu_read_unlock();
2986 return val;
2989 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
2990 MemTxAttrs attrs, MemTxResult *result)
2992 return address_space_ldl_internal(as, addr, attrs, result,
2993 DEVICE_NATIVE_ENDIAN);
2996 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
2997 MemTxAttrs attrs, MemTxResult *result)
2999 return address_space_ldl_internal(as, addr, attrs, result,
3000 DEVICE_LITTLE_ENDIAN);
3003 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3004 MemTxAttrs attrs, MemTxResult *result)
3006 return address_space_ldl_internal(as, addr, attrs, result,
3007 DEVICE_BIG_ENDIAN);
3010 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3012 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3015 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3017 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3020 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3022 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3025 /* warning: addr must be aligned */
3026 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3027 MemTxAttrs attrs,
3028 MemTxResult *result,
3029 enum device_endian endian)
3031 uint8_t *ptr;
3032 uint64_t val;
3033 MemoryRegion *mr;
3034 hwaddr l = 8;
3035 hwaddr addr1;
3036 MemTxResult r;
3037 bool release_lock = false;
3039 rcu_read_lock();
3040 mr = address_space_translate(as, addr, &addr1, &l,
3041 false);
3042 if (l < 8 || !memory_access_is_direct(mr, false)) {
3043 release_lock |= prepare_mmio_access(mr);
3045 /* I/O case */
3046 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3047 #if defined(TARGET_WORDS_BIGENDIAN)
3048 if (endian == DEVICE_LITTLE_ENDIAN) {
3049 val = bswap64(val);
3051 #else
3052 if (endian == DEVICE_BIG_ENDIAN) {
3053 val = bswap64(val);
3055 #endif
3056 } else {
3057 /* RAM case */
3058 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3059 & TARGET_PAGE_MASK)
3060 + addr1);
3061 switch (endian) {
3062 case DEVICE_LITTLE_ENDIAN:
3063 val = ldq_le_p(ptr);
3064 break;
3065 case DEVICE_BIG_ENDIAN:
3066 val = ldq_be_p(ptr);
3067 break;
3068 default:
3069 val = ldq_p(ptr);
3070 break;
3072 r = MEMTX_OK;
3074 if (result) {
3075 *result = r;
3077 if (release_lock) {
3078 qemu_mutex_unlock_iothread();
3080 rcu_read_unlock();
3081 return val;
3084 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3085 MemTxAttrs attrs, MemTxResult *result)
3087 return address_space_ldq_internal(as, addr, attrs, result,
3088 DEVICE_NATIVE_ENDIAN);
3091 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3092 MemTxAttrs attrs, MemTxResult *result)
3094 return address_space_ldq_internal(as, addr, attrs, result,
3095 DEVICE_LITTLE_ENDIAN);
3098 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3099 MemTxAttrs attrs, MemTxResult *result)
3101 return address_space_ldq_internal(as, addr, attrs, result,
3102 DEVICE_BIG_ENDIAN);
3105 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3107 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3110 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3112 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3115 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3117 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3120 /* XXX: optimize */
3121 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3122 MemTxAttrs attrs, MemTxResult *result)
3124 uint8_t val;
3125 MemTxResult r;
3127 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3128 if (result) {
3129 *result = r;
3131 return val;
3134 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3136 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3139 /* warning: addr must be aligned */
3140 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3141 hwaddr addr,
3142 MemTxAttrs attrs,
3143 MemTxResult *result,
3144 enum device_endian endian)
3146 uint8_t *ptr;
3147 uint64_t val;
3148 MemoryRegion *mr;
3149 hwaddr l = 2;
3150 hwaddr addr1;
3151 MemTxResult r;
3152 bool release_lock = false;
3154 rcu_read_lock();
3155 mr = address_space_translate(as, addr, &addr1, &l,
3156 false);
3157 if (l < 2 || !memory_access_is_direct(mr, false)) {
3158 release_lock |= prepare_mmio_access(mr);
3160 /* I/O case */
3161 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3162 #if defined(TARGET_WORDS_BIGENDIAN)
3163 if (endian == DEVICE_LITTLE_ENDIAN) {
3164 val = bswap16(val);
3166 #else
3167 if (endian == DEVICE_BIG_ENDIAN) {
3168 val = bswap16(val);
3170 #endif
3171 } else {
3172 /* RAM case */
3173 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3174 & TARGET_PAGE_MASK)
3175 + addr1);
3176 switch (endian) {
3177 case DEVICE_LITTLE_ENDIAN:
3178 val = lduw_le_p(ptr);
3179 break;
3180 case DEVICE_BIG_ENDIAN:
3181 val = lduw_be_p(ptr);
3182 break;
3183 default:
3184 val = lduw_p(ptr);
3185 break;
3187 r = MEMTX_OK;
3189 if (result) {
3190 *result = r;
3192 if (release_lock) {
3193 qemu_mutex_unlock_iothread();
3195 rcu_read_unlock();
3196 return val;
3199 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3200 MemTxAttrs attrs, MemTxResult *result)
3202 return address_space_lduw_internal(as, addr, attrs, result,
3203 DEVICE_NATIVE_ENDIAN);
3206 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3207 MemTxAttrs attrs, MemTxResult *result)
3209 return address_space_lduw_internal(as, addr, attrs, result,
3210 DEVICE_LITTLE_ENDIAN);
3213 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3214 MemTxAttrs attrs, MemTxResult *result)
3216 return address_space_lduw_internal(as, addr, attrs, result,
3217 DEVICE_BIG_ENDIAN);
3220 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3222 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3225 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3227 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3230 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3232 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3235 /* warning: addr must be aligned. The ram page is not masked as dirty
3236 and the code inside is not invalidated. It is useful if the dirty
3237 bits are used to track modified PTEs */
3238 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3239 MemTxAttrs attrs, MemTxResult *result)
3241 uint8_t *ptr;
3242 MemoryRegion *mr;
3243 hwaddr l = 4;
3244 hwaddr addr1;
3245 MemTxResult r;
3246 uint8_t dirty_log_mask;
3247 bool release_lock = false;
3249 rcu_read_lock();
3250 mr = address_space_translate(as, addr, &addr1, &l,
3251 true);
3252 if (l < 4 || !memory_access_is_direct(mr, true)) {
3253 release_lock |= prepare_mmio_access(mr);
3255 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3256 } else {
3257 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3258 ptr = qemu_get_ram_ptr(addr1);
3259 stl_p(ptr, val);
3261 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3262 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3263 cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3264 r = MEMTX_OK;
3266 if (result) {
3267 *result = r;
3269 if (release_lock) {
3270 qemu_mutex_unlock_iothread();
3272 rcu_read_unlock();
3275 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3277 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3280 /* warning: addr must be aligned */
3281 static inline void address_space_stl_internal(AddressSpace *as,
3282 hwaddr addr, uint32_t val,
3283 MemTxAttrs attrs,
3284 MemTxResult *result,
3285 enum device_endian endian)
3287 uint8_t *ptr;
3288 MemoryRegion *mr;
3289 hwaddr l = 4;
3290 hwaddr addr1;
3291 MemTxResult r;
3292 bool release_lock = false;
3294 rcu_read_lock();
3295 mr = address_space_translate(as, addr, &addr1, &l,
3296 true);
3297 if (l < 4 || !memory_access_is_direct(mr, true)) {
3298 release_lock |= prepare_mmio_access(mr);
3300 #if defined(TARGET_WORDS_BIGENDIAN)
3301 if (endian == DEVICE_LITTLE_ENDIAN) {
3302 val = bswap32(val);
3304 #else
3305 if (endian == DEVICE_BIG_ENDIAN) {
3306 val = bswap32(val);
3308 #endif
3309 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3310 } else {
3311 /* RAM case */
3312 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3313 ptr = qemu_get_ram_ptr(addr1);
3314 switch (endian) {
3315 case DEVICE_LITTLE_ENDIAN:
3316 stl_le_p(ptr, val);
3317 break;
3318 case DEVICE_BIG_ENDIAN:
3319 stl_be_p(ptr, val);
3320 break;
3321 default:
3322 stl_p(ptr, val);
3323 break;
3325 invalidate_and_set_dirty(mr, addr1, 4);
3326 r = MEMTX_OK;
3328 if (result) {
3329 *result = r;
3331 if (release_lock) {
3332 qemu_mutex_unlock_iothread();
3334 rcu_read_unlock();
3337 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3338 MemTxAttrs attrs, MemTxResult *result)
3340 address_space_stl_internal(as, addr, val, attrs, result,
3341 DEVICE_NATIVE_ENDIAN);
3344 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3345 MemTxAttrs attrs, MemTxResult *result)
3347 address_space_stl_internal(as, addr, val, attrs, result,
3348 DEVICE_LITTLE_ENDIAN);
3351 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3352 MemTxAttrs attrs, MemTxResult *result)
3354 address_space_stl_internal(as, addr, val, attrs, result,
3355 DEVICE_BIG_ENDIAN);
3358 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3360 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3363 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3365 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3368 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3370 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3373 /* XXX: optimize */
3374 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3375 MemTxAttrs attrs, MemTxResult *result)
3377 uint8_t v = val;
3378 MemTxResult r;
3380 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3381 if (result) {
3382 *result = r;
3386 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3388 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3391 /* warning: addr must be aligned */
3392 static inline void address_space_stw_internal(AddressSpace *as,
3393 hwaddr addr, uint32_t val,
3394 MemTxAttrs attrs,
3395 MemTxResult *result,
3396 enum device_endian endian)
3398 uint8_t *ptr;
3399 MemoryRegion *mr;
3400 hwaddr l = 2;
3401 hwaddr addr1;
3402 MemTxResult r;
3403 bool release_lock = false;
3405 rcu_read_lock();
3406 mr = address_space_translate(as, addr, &addr1, &l, true);
3407 if (l < 2 || !memory_access_is_direct(mr, true)) {
3408 release_lock |= prepare_mmio_access(mr);
3410 #if defined(TARGET_WORDS_BIGENDIAN)
3411 if (endian == DEVICE_LITTLE_ENDIAN) {
3412 val = bswap16(val);
3414 #else
3415 if (endian == DEVICE_BIG_ENDIAN) {
3416 val = bswap16(val);
3418 #endif
3419 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3420 } else {
3421 /* RAM case */
3422 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3423 ptr = qemu_get_ram_ptr(addr1);
3424 switch (endian) {
3425 case DEVICE_LITTLE_ENDIAN:
3426 stw_le_p(ptr, val);
3427 break;
3428 case DEVICE_BIG_ENDIAN:
3429 stw_be_p(ptr, val);
3430 break;
3431 default:
3432 stw_p(ptr, val);
3433 break;
3435 invalidate_and_set_dirty(mr, addr1, 2);
3436 r = MEMTX_OK;
3438 if (result) {
3439 *result = r;
3441 if (release_lock) {
3442 qemu_mutex_unlock_iothread();
3444 rcu_read_unlock();
3447 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3448 MemTxAttrs attrs, MemTxResult *result)
3450 address_space_stw_internal(as, addr, val, attrs, result,
3451 DEVICE_NATIVE_ENDIAN);
3454 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3455 MemTxAttrs attrs, MemTxResult *result)
3457 address_space_stw_internal(as, addr, val, attrs, result,
3458 DEVICE_LITTLE_ENDIAN);
3461 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3462 MemTxAttrs attrs, MemTxResult *result)
3464 address_space_stw_internal(as, addr, val, attrs, result,
3465 DEVICE_BIG_ENDIAN);
3468 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3470 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3473 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3475 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3478 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3480 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3483 /* XXX: optimize */
3484 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3485 MemTxAttrs attrs, MemTxResult *result)
3487 MemTxResult r;
3488 val = tswap64(val);
3489 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3490 if (result) {
3491 *result = r;
3495 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3496 MemTxAttrs attrs, MemTxResult *result)
3498 MemTxResult r;
3499 val = cpu_to_le64(val);
3500 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3501 if (result) {
3502 *result = r;
3505 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3506 MemTxAttrs attrs, MemTxResult *result)
3508 MemTxResult r;
3509 val = cpu_to_be64(val);
3510 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3511 if (result) {
3512 *result = r;
3516 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3518 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3521 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3523 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3526 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3528 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3531 /* virtual memory access for debug (includes writing to ROM) */
3532 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3533 uint8_t *buf, int len, int is_write)
3535 int l;
3536 hwaddr phys_addr;
3537 target_ulong page;
3539 while (len > 0) {
3540 page = addr & TARGET_PAGE_MASK;
3541 phys_addr = cpu_get_phys_page_debug(cpu, page);
3542 /* if no physical page mapped, return an error */
3543 if (phys_addr == -1)
3544 return -1;
3545 l = (page + TARGET_PAGE_SIZE) - addr;
3546 if (l > len)
3547 l = len;
3548 phys_addr += (addr & ~TARGET_PAGE_MASK);
3549 if (is_write) {
3550 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
3551 } else {
3552 address_space_rw(cpu->as, phys_addr, MEMTXATTRS_UNSPECIFIED,
3553 buf, l, 0);
3555 len -= l;
3556 buf += l;
3557 addr += l;
3559 return 0;
3563 * Allows code that needs to deal with migration bitmaps etc to still be built
3564 * target independent.
3566 size_t qemu_target_page_bits(void)
3568 return TARGET_PAGE_BITS;
3571 #endif
3574 * A helper function for the _utterly broken_ virtio device model to find out if
3575 * it's running on a big endian machine. Don't do this at home kids!
3577 bool target_words_bigendian(void);
3578 bool target_words_bigendian(void)
3580 #if defined(TARGET_WORDS_BIGENDIAN)
3581 return true;
3582 #else
3583 return false;
3584 #endif
3587 #ifndef CONFIG_USER_ONLY
3588 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3590 MemoryRegion*mr;
3591 hwaddr l = 1;
3592 bool res;
3594 rcu_read_lock();
3595 mr = address_space_translate(&address_space_memory,
3596 phys_addr, &phys_addr, &l, false);
3598 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3599 rcu_read_unlock();
3600 return res;
3603 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3605 RAMBlock *block;
3606 int ret = 0;
3608 rcu_read_lock();
3609 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3610 ret = func(block->idstr, block->host, block->offset,
3611 block->used_length, opaque);
3612 if (ret) {
3613 break;
3616 rcu_read_unlock();
3617 return ret;
3619 #endif