savevm: fail if migration blockers are present
[qemu.git] / exec.c
blobfc7526666f05d077c8d1a60fcbd6a1208990e980
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include "qapi/error.h"
21 #ifndef _WIN32
22 #include <sys/mman.h>
23 #endif
25 #include "qemu/cutils.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
31 #endif
32 #include "hw/qdev.h"
33 #include "sysemu/kvm.h"
34 #include "sysemu/sysemu.h"
35 #include "hw/xen/xen.h"
36 #include "qemu/timer.h"
37 #include "qemu/config-file.h"
38 #include "qemu/error-report.h"
39 #include "exec/memory.h"
40 #include "sysemu/dma.h"
41 #include "exec/address-spaces.h"
42 #if defined(CONFIG_USER_ONLY)
43 #include <qemu.h>
44 #else /* !CONFIG_USER_ONLY */
45 #include "sysemu/xen-mapcache.h"
46 #include "trace.h"
47 #endif
48 #include "exec/cpu-all.h"
49 #include "qemu/rcu_queue.h"
50 #include "qemu/main-loop.h"
51 #include "translate-all.h"
52 #include "sysemu/replay.h"
54 #include "exec/memory-internal.h"
55 #include "exec/ram_addr.h"
56 #include "exec/log.h"
58 #include "qemu/range.h"
59 #ifndef _WIN32
60 #include "qemu/mmap-alloc.h"
61 #endif
63 //#define DEBUG_SUBPAGE
65 #if !defined(CONFIG_USER_ONLY)
66 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
67 * are protected by the ramlist lock.
69 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
71 static MemoryRegion *system_memory;
72 static MemoryRegion *system_io;
74 AddressSpace address_space_io;
75 AddressSpace address_space_memory;
77 MemoryRegion io_mem_rom, io_mem_notdirty;
78 static MemoryRegion io_mem_unassigned;
80 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
81 #define RAM_PREALLOC (1 << 0)
83 /* RAM is mmap-ed with MAP_SHARED */
84 #define RAM_SHARED (1 << 1)
86 /* Only a portion of RAM (used_length) is actually used, and migrated.
87 * This used_length size can change across reboots.
89 #define RAM_RESIZEABLE (1 << 2)
91 #endif
93 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
94 /* current CPU in the current thread. It is only valid inside
95 cpu_exec() */
96 __thread CPUState *current_cpu;
97 /* 0 = Do not count executed instructions.
98 1 = Precise instruction counting.
99 2 = Adaptive rate instruction counting. */
100 int use_icount;
102 #if !defined(CONFIG_USER_ONLY)
104 typedef struct PhysPageEntry PhysPageEntry;
106 struct PhysPageEntry {
107 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
108 uint32_t skip : 6;
109 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
110 uint32_t ptr : 26;
113 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
115 /* Size of the L2 (and L3, etc) page tables. */
116 #define ADDR_SPACE_BITS 64
118 #define P_L2_BITS 9
119 #define P_L2_SIZE (1 << P_L2_BITS)
121 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
123 typedef PhysPageEntry Node[P_L2_SIZE];
125 typedef struct PhysPageMap {
126 struct rcu_head rcu;
128 unsigned sections_nb;
129 unsigned sections_nb_alloc;
130 unsigned nodes_nb;
131 unsigned nodes_nb_alloc;
132 Node *nodes;
133 MemoryRegionSection *sections;
134 } PhysPageMap;
136 struct AddressSpaceDispatch {
137 struct rcu_head rcu;
139 MemoryRegionSection *mru_section;
140 /* This is a multi-level map on the physical address space.
141 * The bottom level has pointers to MemoryRegionSections.
143 PhysPageEntry phys_map;
144 PhysPageMap map;
145 AddressSpace *as;
148 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
149 typedef struct subpage_t {
150 MemoryRegion iomem;
151 AddressSpace *as;
152 hwaddr base;
153 uint16_t sub_section[TARGET_PAGE_SIZE];
154 } subpage_t;
156 #define PHYS_SECTION_UNASSIGNED 0
157 #define PHYS_SECTION_NOTDIRTY 1
158 #define PHYS_SECTION_ROM 2
159 #define PHYS_SECTION_WATCH 3
161 static void io_mem_init(void);
162 static void memory_map_init(void);
163 static void tcg_commit(MemoryListener *listener);
165 static MemoryRegion io_mem_watch;
168 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
169 * @cpu: the CPU whose AddressSpace this is
170 * @as: the AddressSpace itself
171 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
172 * @tcg_as_listener: listener for tracking changes to the AddressSpace
174 struct CPUAddressSpace {
175 CPUState *cpu;
176 AddressSpace *as;
177 struct AddressSpaceDispatch *memory_dispatch;
178 MemoryListener tcg_as_listener;
181 #endif
183 #if !defined(CONFIG_USER_ONLY)
185 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
187 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
188 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
189 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
190 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
194 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
196 unsigned i;
197 uint32_t ret;
198 PhysPageEntry e;
199 PhysPageEntry *p;
201 ret = map->nodes_nb++;
202 p = map->nodes[ret];
203 assert(ret != PHYS_MAP_NODE_NIL);
204 assert(ret != map->nodes_nb_alloc);
206 e.skip = leaf ? 0 : 1;
207 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
208 for (i = 0; i < P_L2_SIZE; ++i) {
209 memcpy(&p[i], &e, sizeof(e));
211 return ret;
214 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
215 hwaddr *index, hwaddr *nb, uint16_t leaf,
216 int level)
218 PhysPageEntry *p;
219 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
221 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
222 lp->ptr = phys_map_node_alloc(map, level == 0);
224 p = map->nodes[lp->ptr];
225 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
227 while (*nb && lp < &p[P_L2_SIZE]) {
228 if ((*index & (step - 1)) == 0 && *nb >= step) {
229 lp->skip = 0;
230 lp->ptr = leaf;
231 *index += step;
232 *nb -= step;
233 } else {
234 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
236 ++lp;
240 static void phys_page_set(AddressSpaceDispatch *d,
241 hwaddr index, hwaddr nb,
242 uint16_t leaf)
244 /* Wildly overreserve - it doesn't matter much. */
245 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
247 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
250 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
251 * and update our entry so we can skip it and go directly to the destination.
253 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
255 unsigned valid_ptr = P_L2_SIZE;
256 int valid = 0;
257 PhysPageEntry *p;
258 int i;
260 if (lp->ptr == PHYS_MAP_NODE_NIL) {
261 return;
264 p = nodes[lp->ptr];
265 for (i = 0; i < P_L2_SIZE; i++) {
266 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
267 continue;
270 valid_ptr = i;
271 valid++;
272 if (p[i].skip) {
273 phys_page_compact(&p[i], nodes, compacted);
277 /* We can only compress if there's only one child. */
278 if (valid != 1) {
279 return;
282 assert(valid_ptr < P_L2_SIZE);
284 /* Don't compress if it won't fit in the # of bits we have. */
285 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
286 return;
289 lp->ptr = p[valid_ptr].ptr;
290 if (!p[valid_ptr].skip) {
291 /* If our only child is a leaf, make this a leaf. */
292 /* By design, we should have made this node a leaf to begin with so we
293 * should never reach here.
294 * But since it's so simple to handle this, let's do it just in case we
295 * change this rule.
297 lp->skip = 0;
298 } else {
299 lp->skip += p[valid_ptr].skip;
303 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
305 DECLARE_BITMAP(compacted, nodes_nb);
307 if (d->phys_map.skip) {
308 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
312 static inline bool section_covers_addr(const MemoryRegionSection *section,
313 hwaddr addr)
315 /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
316 * the section must cover the entire address space.
318 return section->size.hi ||
319 range_covers_byte(section->offset_within_address_space,
320 section->size.lo, addr);
323 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
324 Node *nodes, MemoryRegionSection *sections)
326 PhysPageEntry *p;
327 hwaddr index = addr >> TARGET_PAGE_BITS;
328 int i;
330 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
331 if (lp.ptr == PHYS_MAP_NODE_NIL) {
332 return &sections[PHYS_SECTION_UNASSIGNED];
334 p = nodes[lp.ptr];
335 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
338 if (section_covers_addr(&sections[lp.ptr], addr)) {
339 return &sections[lp.ptr];
340 } else {
341 return &sections[PHYS_SECTION_UNASSIGNED];
345 bool memory_region_is_unassigned(MemoryRegion *mr)
347 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
348 && mr != &io_mem_watch;
351 /* Called from RCU critical section */
352 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
353 hwaddr addr,
354 bool resolve_subpage)
356 MemoryRegionSection *section = atomic_read(&d->mru_section);
357 subpage_t *subpage;
358 bool update;
360 if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
361 section_covers_addr(section, addr)) {
362 update = false;
363 } else {
364 section = phys_page_find(d->phys_map, addr, d->map.nodes,
365 d->map.sections);
366 update = true;
368 if (resolve_subpage && section->mr->subpage) {
369 subpage = container_of(section->mr, subpage_t, iomem);
370 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
372 if (update) {
373 atomic_set(&d->mru_section, section);
375 return section;
378 /* Called from RCU critical section */
379 static MemoryRegionSection *
380 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
381 hwaddr *plen, bool resolve_subpage)
383 MemoryRegionSection *section;
384 MemoryRegion *mr;
385 Int128 diff;
387 section = address_space_lookup_region(d, addr, resolve_subpage);
388 /* Compute offset within MemoryRegionSection */
389 addr -= section->offset_within_address_space;
391 /* Compute offset within MemoryRegion */
392 *xlat = addr + section->offset_within_region;
394 mr = section->mr;
396 /* MMIO registers can be expected to perform full-width accesses based only
397 * on their address, without considering adjacent registers that could
398 * decode to completely different MemoryRegions. When such registers
399 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
400 * regions overlap wildly. For this reason we cannot clamp the accesses
401 * here.
403 * If the length is small (as is the case for address_space_ldl/stl),
404 * everything works fine. If the incoming length is large, however,
405 * the caller really has to do the clamping through memory_access_size.
407 if (memory_region_is_ram(mr)) {
408 diff = int128_sub(section->size, int128_make64(addr));
409 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
411 return section;
414 /* Called from RCU critical section */
415 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
416 hwaddr *xlat, hwaddr *plen,
417 bool is_write)
419 IOMMUTLBEntry iotlb;
420 MemoryRegionSection *section;
421 MemoryRegion *mr;
423 for (;;) {
424 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
425 section = address_space_translate_internal(d, addr, &addr, plen, true);
426 mr = section->mr;
428 if (!mr->iommu_ops) {
429 break;
432 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
433 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
434 | (addr & iotlb.addr_mask));
435 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
436 if (!(iotlb.perm & (1 << is_write))) {
437 mr = &io_mem_unassigned;
438 break;
441 as = iotlb.target_as;
444 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
445 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
446 *plen = MIN(page, *plen);
449 *xlat = addr;
450 return mr;
453 /* Called from RCU critical section */
454 MemoryRegionSection *
455 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
456 hwaddr *xlat, hwaddr *plen)
458 MemoryRegionSection *section;
459 AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;
461 section = address_space_translate_internal(d, addr, xlat, plen, false);
463 assert(!section->mr->iommu_ops);
464 return section;
466 #endif
468 #if !defined(CONFIG_USER_ONLY)
470 static int cpu_common_post_load(void *opaque, int version_id)
472 CPUState *cpu = opaque;
474 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
475 version_id is increased. */
476 cpu->interrupt_request &= ~0x01;
477 tlb_flush(cpu, 1);
479 return 0;
482 static int cpu_common_pre_load(void *opaque)
484 CPUState *cpu = opaque;
486 cpu->exception_index = -1;
488 return 0;
491 static bool cpu_common_exception_index_needed(void *opaque)
493 CPUState *cpu = opaque;
495 return tcg_enabled() && cpu->exception_index != -1;
498 static const VMStateDescription vmstate_cpu_common_exception_index = {
499 .name = "cpu_common/exception_index",
500 .version_id = 1,
501 .minimum_version_id = 1,
502 .needed = cpu_common_exception_index_needed,
503 .fields = (VMStateField[]) {
504 VMSTATE_INT32(exception_index, CPUState),
505 VMSTATE_END_OF_LIST()
509 static bool cpu_common_crash_occurred_needed(void *opaque)
511 CPUState *cpu = opaque;
513 return cpu->crash_occurred;
516 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
517 .name = "cpu_common/crash_occurred",
518 .version_id = 1,
519 .minimum_version_id = 1,
520 .needed = cpu_common_crash_occurred_needed,
521 .fields = (VMStateField[]) {
522 VMSTATE_BOOL(crash_occurred, CPUState),
523 VMSTATE_END_OF_LIST()
527 const VMStateDescription vmstate_cpu_common = {
528 .name = "cpu_common",
529 .version_id = 1,
530 .minimum_version_id = 1,
531 .pre_load = cpu_common_pre_load,
532 .post_load = cpu_common_post_load,
533 .fields = (VMStateField[]) {
534 VMSTATE_UINT32(halted, CPUState),
535 VMSTATE_UINT32(interrupt_request, CPUState),
536 VMSTATE_END_OF_LIST()
538 .subsections = (const VMStateDescription*[]) {
539 &vmstate_cpu_common_exception_index,
540 &vmstate_cpu_common_crash_occurred,
541 NULL
545 #endif
547 CPUState *qemu_get_cpu(int index)
549 CPUState *cpu;
551 CPU_FOREACH(cpu) {
552 if (cpu->cpu_index == index) {
553 return cpu;
557 return NULL;
560 #if !defined(CONFIG_USER_ONLY)
561 void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
563 CPUAddressSpace *newas;
565 /* Target code should have set num_ases before calling us */
566 assert(asidx < cpu->num_ases);
568 if (asidx == 0) {
569 /* address space 0 gets the convenience alias */
570 cpu->as = as;
573 /* KVM cannot currently support multiple address spaces. */
574 assert(asidx == 0 || !kvm_enabled());
576 if (!cpu->cpu_ases) {
577 cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
580 newas = &cpu->cpu_ases[asidx];
581 newas->cpu = cpu;
582 newas->as = as;
583 if (tcg_enabled()) {
584 newas->tcg_as_listener.commit = tcg_commit;
585 memory_listener_register(&newas->tcg_as_listener, as);
589 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
591 /* Return the AddressSpace corresponding to the specified index */
592 return cpu->cpu_ases[asidx].as;
594 #endif
596 #ifndef CONFIG_USER_ONLY
597 static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
599 static int cpu_get_free_index(Error **errp)
601 int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
603 if (cpu >= MAX_CPUMASK_BITS) {
604 error_setg(errp, "Trying to use more CPUs than max of %d",
605 MAX_CPUMASK_BITS);
606 return -1;
609 bitmap_set(cpu_index_map, cpu, 1);
610 return cpu;
613 void cpu_exec_exit(CPUState *cpu)
615 if (cpu->cpu_index == -1) {
616 /* cpu_index was never allocated by this @cpu or was already freed. */
617 return;
620 bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
621 cpu->cpu_index = -1;
623 #else
625 static int cpu_get_free_index(Error **errp)
627 CPUState *some_cpu;
628 int cpu_index = 0;
630 CPU_FOREACH(some_cpu) {
631 cpu_index++;
633 return cpu_index;
636 void cpu_exec_exit(CPUState *cpu)
639 #endif
641 void cpu_exec_init(CPUState *cpu, Error **errp)
643 CPUClass *cc = CPU_GET_CLASS(cpu);
644 int cpu_index;
645 Error *local_err = NULL;
647 cpu->as = NULL;
648 cpu->num_ases = 0;
650 #ifndef CONFIG_USER_ONLY
651 cpu->thread_id = qemu_get_thread_id();
653 /* This is a softmmu CPU object, so create a property for it
654 * so users can wire up its memory. (This can't go in qom/cpu.c
655 * because that file is compiled only once for both user-mode
656 * and system builds.) The default if no link is set up is to use
657 * the system address space.
659 object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
660 (Object **)&cpu->memory,
661 qdev_prop_allow_set_link_before_realize,
662 OBJ_PROP_LINK_UNREF_ON_RELEASE,
663 &error_abort);
664 cpu->memory = system_memory;
665 object_ref(OBJECT(cpu->memory));
666 #endif
668 #if defined(CONFIG_USER_ONLY)
669 cpu_list_lock();
670 #endif
671 cpu_index = cpu->cpu_index = cpu_get_free_index(&local_err);
672 if (local_err) {
673 error_propagate(errp, local_err);
674 #if defined(CONFIG_USER_ONLY)
675 cpu_list_unlock();
676 #endif
677 return;
679 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
680 #if defined(CONFIG_USER_ONLY)
681 cpu_list_unlock();
682 #endif
683 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
684 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
686 if (cc->vmsd != NULL) {
687 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
691 #if defined(CONFIG_USER_ONLY)
692 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
694 tb_invalidate_phys_page_range(pc, pc + 1, 0);
696 #else
697 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
699 MemTxAttrs attrs;
700 hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
701 int asidx = cpu_asidx_from_attrs(cpu, attrs);
702 if (phys != -1) {
703 tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
704 phys | (pc & ~TARGET_PAGE_MASK));
707 #endif
709 #if defined(CONFIG_USER_ONLY)
710 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
715 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
716 int flags)
718 return -ENOSYS;
721 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
725 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
726 int flags, CPUWatchpoint **watchpoint)
728 return -ENOSYS;
730 #else
731 /* Add a watchpoint. */
732 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
733 int flags, CPUWatchpoint **watchpoint)
735 CPUWatchpoint *wp;
737 /* forbid ranges which are empty or run off the end of the address space */
738 if (len == 0 || (addr + len - 1) < addr) {
739 error_report("tried to set invalid watchpoint at %"
740 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
741 return -EINVAL;
743 wp = g_malloc(sizeof(*wp));
745 wp->vaddr = addr;
746 wp->len = len;
747 wp->flags = flags;
749 /* keep all GDB-injected watchpoints in front */
750 if (flags & BP_GDB) {
751 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
752 } else {
753 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
756 tlb_flush_page(cpu, addr);
758 if (watchpoint)
759 *watchpoint = wp;
760 return 0;
763 /* Remove a specific watchpoint. */
764 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
765 int flags)
767 CPUWatchpoint *wp;
769 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
770 if (addr == wp->vaddr && len == wp->len
771 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
772 cpu_watchpoint_remove_by_ref(cpu, wp);
773 return 0;
776 return -ENOENT;
779 /* Remove a specific watchpoint by reference. */
780 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
782 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
784 tlb_flush_page(cpu, watchpoint->vaddr);
786 g_free(watchpoint);
789 /* Remove all matching watchpoints. */
790 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
792 CPUWatchpoint *wp, *next;
794 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
795 if (wp->flags & mask) {
796 cpu_watchpoint_remove_by_ref(cpu, wp);
801 /* Return true if this watchpoint address matches the specified
802 * access (ie the address range covered by the watchpoint overlaps
803 * partially or completely with the address range covered by the
804 * access).
806 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
807 vaddr addr,
808 vaddr len)
810 /* We know the lengths are non-zero, but a little caution is
811 * required to avoid errors in the case where the range ends
812 * exactly at the top of the address space and so addr + len
813 * wraps round to zero.
815 vaddr wpend = wp->vaddr + wp->len - 1;
816 vaddr addrend = addr + len - 1;
818 return !(addr > wpend || wp->vaddr > addrend);
821 #endif
823 /* Add a breakpoint. */
824 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
825 CPUBreakpoint **breakpoint)
827 CPUBreakpoint *bp;
829 bp = g_malloc(sizeof(*bp));
831 bp->pc = pc;
832 bp->flags = flags;
834 /* keep all GDB-injected breakpoints in front */
835 if (flags & BP_GDB) {
836 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
837 } else {
838 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
841 breakpoint_invalidate(cpu, pc);
843 if (breakpoint) {
844 *breakpoint = bp;
846 return 0;
849 /* Remove a specific breakpoint. */
850 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
852 CPUBreakpoint *bp;
854 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
855 if (bp->pc == pc && bp->flags == flags) {
856 cpu_breakpoint_remove_by_ref(cpu, bp);
857 return 0;
860 return -ENOENT;
863 /* Remove a specific breakpoint by reference. */
864 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
866 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
868 breakpoint_invalidate(cpu, breakpoint->pc);
870 g_free(breakpoint);
873 /* Remove all matching breakpoints. */
874 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
876 CPUBreakpoint *bp, *next;
878 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
879 if (bp->flags & mask) {
880 cpu_breakpoint_remove_by_ref(cpu, bp);
885 /* enable or disable single step mode. EXCP_DEBUG is returned by the
886 CPU loop after each instruction */
887 void cpu_single_step(CPUState *cpu, int enabled)
889 if (cpu->singlestep_enabled != enabled) {
890 cpu->singlestep_enabled = enabled;
891 if (kvm_enabled()) {
892 kvm_update_guest_debug(cpu, 0);
893 } else {
894 /* must flush all the translated code to avoid inconsistencies */
895 /* XXX: only flush what is necessary */
896 tb_flush(cpu);
901 void cpu_abort(CPUState *cpu, const char *fmt, ...)
903 va_list ap;
904 va_list ap2;
906 va_start(ap, fmt);
907 va_copy(ap2, ap);
908 fprintf(stderr, "qemu: fatal: ");
909 vfprintf(stderr, fmt, ap);
910 fprintf(stderr, "\n");
911 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
912 if (qemu_log_separate()) {
913 qemu_log("qemu: fatal: ");
914 qemu_log_vprintf(fmt, ap2);
915 qemu_log("\n");
916 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
917 qemu_log_flush();
918 qemu_log_close();
920 va_end(ap2);
921 va_end(ap);
922 replay_finish();
923 #if defined(CONFIG_USER_ONLY)
925 struct sigaction act;
926 sigfillset(&act.sa_mask);
927 act.sa_handler = SIG_DFL;
928 sigaction(SIGABRT, &act, NULL);
930 #endif
931 abort();
934 #if !defined(CONFIG_USER_ONLY)
935 /* Called from RCU critical section */
936 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
938 RAMBlock *block;
940 block = atomic_rcu_read(&ram_list.mru_block);
941 if (block && addr - block->offset < block->max_length) {
942 return block;
944 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
945 if (addr - block->offset < block->max_length) {
946 goto found;
950 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
951 abort();
953 found:
954 /* It is safe to write mru_block outside the iothread lock. This
955 * is what happens:
957 * mru_block = xxx
958 * rcu_read_unlock()
959 * xxx removed from list
960 * rcu_read_lock()
961 * read mru_block
962 * mru_block = NULL;
963 * call_rcu(reclaim_ramblock, xxx);
964 * rcu_read_unlock()
966 * atomic_rcu_set is not needed here. The block was already published
967 * when it was placed into the list. Here we're just making an extra
968 * copy of the pointer.
970 ram_list.mru_block = block;
971 return block;
974 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
976 CPUState *cpu;
977 ram_addr_t start1;
978 RAMBlock *block;
979 ram_addr_t end;
981 end = TARGET_PAGE_ALIGN(start + length);
982 start &= TARGET_PAGE_MASK;
984 rcu_read_lock();
985 block = qemu_get_ram_block(start);
986 assert(block == qemu_get_ram_block(end - 1));
987 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
988 CPU_FOREACH(cpu) {
989 tlb_reset_dirty(cpu, start1, length);
991 rcu_read_unlock();
994 /* Note: start and end must be within the same ram block. */
995 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
996 ram_addr_t length,
997 unsigned client)
999 DirtyMemoryBlocks *blocks;
1000 unsigned long end, page;
1001 bool dirty = false;
1003 if (length == 0) {
1004 return false;
1007 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
1008 page = start >> TARGET_PAGE_BITS;
1010 rcu_read_lock();
1012 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1014 while (page < end) {
1015 unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1016 unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1017 unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1019 dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1020 offset, num);
1021 page += num;
1024 rcu_read_unlock();
1026 if (dirty && tcg_enabled()) {
1027 tlb_reset_dirty_range_all(start, length);
1030 return dirty;
1033 /* Called from RCU critical section */
1034 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1035 MemoryRegionSection *section,
1036 target_ulong vaddr,
1037 hwaddr paddr, hwaddr xlat,
1038 int prot,
1039 target_ulong *address)
1041 hwaddr iotlb;
1042 CPUWatchpoint *wp;
1044 if (memory_region_is_ram(section->mr)) {
1045 /* Normal RAM. */
1046 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1047 + xlat;
1048 if (!section->readonly) {
1049 iotlb |= PHYS_SECTION_NOTDIRTY;
1050 } else {
1051 iotlb |= PHYS_SECTION_ROM;
1053 } else {
1054 AddressSpaceDispatch *d;
1056 d = atomic_rcu_read(&section->address_space->dispatch);
1057 iotlb = section - d->map.sections;
1058 iotlb += xlat;
1061 /* Make accesses to pages with watchpoints go via the
1062 watchpoint trap routines. */
1063 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1064 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1065 /* Avoid trapping reads of pages with a write breakpoint. */
1066 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1067 iotlb = PHYS_SECTION_WATCH + paddr;
1068 *address |= TLB_MMIO;
1069 break;
1074 return iotlb;
1076 #endif /* defined(CONFIG_USER_ONLY) */
1078 #if !defined(CONFIG_USER_ONLY)
1080 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1081 uint16_t section);
1082 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1084 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1085 qemu_anon_ram_alloc;
1088 * Set a custom physical guest memory alloator.
1089 * Accelerators with unusual needs may need this. Hopefully, we can
1090 * get rid of it eventually.
1092 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1094 phys_mem_alloc = alloc;
1097 static uint16_t phys_section_add(PhysPageMap *map,
1098 MemoryRegionSection *section)
1100 /* The physical section number is ORed with a page-aligned
1101 * pointer to produce the iotlb entries. Thus it should
1102 * never overflow into the page-aligned value.
1104 assert(map->sections_nb < TARGET_PAGE_SIZE);
1106 if (map->sections_nb == map->sections_nb_alloc) {
1107 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1108 map->sections = g_renew(MemoryRegionSection, map->sections,
1109 map->sections_nb_alloc);
1111 map->sections[map->sections_nb] = *section;
1112 memory_region_ref(section->mr);
1113 return map->sections_nb++;
1116 static void phys_section_destroy(MemoryRegion *mr)
1118 bool have_sub_page = mr->subpage;
1120 memory_region_unref(mr);
1122 if (have_sub_page) {
1123 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1124 object_unref(OBJECT(&subpage->iomem));
1125 g_free(subpage);
1129 static void phys_sections_free(PhysPageMap *map)
1131 while (map->sections_nb > 0) {
1132 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1133 phys_section_destroy(section->mr);
1135 g_free(map->sections);
1136 g_free(map->nodes);
1139 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1141 subpage_t *subpage;
1142 hwaddr base = section->offset_within_address_space
1143 & TARGET_PAGE_MASK;
1144 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1145 d->map.nodes, d->map.sections);
1146 MemoryRegionSection subsection = {
1147 .offset_within_address_space = base,
1148 .size = int128_make64(TARGET_PAGE_SIZE),
1150 hwaddr start, end;
1152 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1154 if (!(existing->mr->subpage)) {
1155 subpage = subpage_init(d->as, base);
1156 subsection.address_space = d->as;
1157 subsection.mr = &subpage->iomem;
1158 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1159 phys_section_add(&d->map, &subsection));
1160 } else {
1161 subpage = container_of(existing->mr, subpage_t, iomem);
1163 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1164 end = start + int128_get64(section->size) - 1;
1165 subpage_register(subpage, start, end,
1166 phys_section_add(&d->map, section));
1170 static void register_multipage(AddressSpaceDispatch *d,
1171 MemoryRegionSection *section)
1173 hwaddr start_addr = section->offset_within_address_space;
1174 uint16_t section_index = phys_section_add(&d->map, section);
1175 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1176 TARGET_PAGE_BITS));
1178 assert(num_pages);
1179 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1182 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1184 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1185 AddressSpaceDispatch *d = as->next_dispatch;
1186 MemoryRegionSection now = *section, remain = *section;
1187 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1189 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1190 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1191 - now.offset_within_address_space;
1193 now.size = int128_min(int128_make64(left), now.size);
1194 register_subpage(d, &now);
1195 } else {
1196 now.size = int128_zero();
1198 while (int128_ne(remain.size, now.size)) {
1199 remain.size = int128_sub(remain.size, now.size);
1200 remain.offset_within_address_space += int128_get64(now.size);
1201 remain.offset_within_region += int128_get64(now.size);
1202 now = remain;
1203 if (int128_lt(remain.size, page_size)) {
1204 register_subpage(d, &now);
1205 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1206 now.size = page_size;
1207 register_subpage(d, &now);
1208 } else {
1209 now.size = int128_and(now.size, int128_neg(page_size));
1210 register_multipage(d, &now);
1215 void qemu_flush_coalesced_mmio_buffer(void)
1217 if (kvm_enabled())
1218 kvm_flush_coalesced_mmio_buffer();
1221 void qemu_mutex_lock_ramlist(void)
1223 qemu_mutex_lock(&ram_list.mutex);
1226 void qemu_mutex_unlock_ramlist(void)
1228 qemu_mutex_unlock(&ram_list.mutex);
1231 #ifdef __linux__
1232 static void *file_ram_alloc(RAMBlock *block,
1233 ram_addr_t memory,
1234 const char *path,
1235 Error **errp)
1237 bool unlink_on_error = false;
1238 char *filename;
1239 char *sanitized_name;
1240 char *c;
1241 void *area;
1242 int fd = -1;
1243 int64_t page_size;
1245 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1246 error_setg(errp,
1247 "host lacks kvm mmu notifiers, -mem-path unsupported");
1248 return NULL;
1251 for (;;) {
1252 fd = open(path, O_RDWR);
1253 if (fd >= 0) {
1254 /* @path names an existing file, use it */
1255 break;
1257 if (errno == ENOENT) {
1258 /* @path names a file that doesn't exist, create it */
1259 fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1260 if (fd >= 0) {
1261 unlink_on_error = true;
1262 break;
1264 } else if (errno == EISDIR) {
1265 /* @path names a directory, create a file there */
1266 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1267 sanitized_name = g_strdup(memory_region_name(block->mr));
1268 for (c = sanitized_name; *c != '\0'; c++) {
1269 if (*c == '/') {
1270 *c = '_';
1274 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1275 sanitized_name);
1276 g_free(sanitized_name);
1278 fd = mkstemp(filename);
1279 if (fd >= 0) {
1280 unlink(filename);
1281 g_free(filename);
1282 break;
1284 g_free(filename);
1286 if (errno != EEXIST && errno != EINTR) {
1287 error_setg_errno(errp, errno,
1288 "can't open backing store %s for guest RAM",
1289 path);
1290 goto error;
1293 * Try again on EINTR and EEXIST. The latter happens when
1294 * something else creates the file between our two open().
1298 page_size = qemu_fd_getpagesize(fd);
1299 block->mr->align = MAX(page_size, QEMU_VMALLOC_ALIGN);
1301 if (memory < page_size) {
1302 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1303 "or larger than page size 0x%" PRIx64,
1304 memory, page_size);
1305 goto error;
1308 memory = ROUND_UP(memory, page_size);
1311 * ftruncate is not supported by hugetlbfs in older
1312 * hosts, so don't bother bailing out on errors.
1313 * If anything goes wrong with it under other filesystems,
1314 * mmap will fail.
1316 if (ftruncate(fd, memory)) {
1317 perror("ftruncate");
1320 area = qemu_ram_mmap(fd, memory, block->mr->align,
1321 block->flags & RAM_SHARED);
1322 if (area == MAP_FAILED) {
1323 error_setg_errno(errp, errno,
1324 "unable to map backing store for guest RAM");
1325 goto error;
1328 if (mem_prealloc) {
1329 os_mem_prealloc(fd, area, memory);
1332 block->fd = fd;
1333 return area;
1335 error:
1336 if (unlink_on_error) {
1337 unlink(path);
1339 if (fd != -1) {
1340 close(fd);
1342 return NULL;
1344 #endif
1346 /* Called with the ramlist lock held. */
1347 static ram_addr_t find_ram_offset(ram_addr_t size)
1349 RAMBlock *block, *next_block;
1350 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1352 assert(size != 0); /* it would hand out same offset multiple times */
1354 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1355 return 0;
1358 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1359 ram_addr_t end, next = RAM_ADDR_MAX;
1361 end = block->offset + block->max_length;
1363 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1364 if (next_block->offset >= end) {
1365 next = MIN(next, next_block->offset);
1368 if (next - end >= size && next - end < mingap) {
1369 offset = end;
1370 mingap = next - end;
1374 if (offset == RAM_ADDR_MAX) {
1375 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1376 (uint64_t)size);
1377 abort();
1380 return offset;
1383 ram_addr_t last_ram_offset(void)
1385 RAMBlock *block;
1386 ram_addr_t last = 0;
1388 rcu_read_lock();
1389 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1390 last = MAX(last, block->offset + block->max_length);
1392 rcu_read_unlock();
1393 return last;
1396 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1398 int ret;
1400 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1401 if (!machine_dump_guest_core(current_machine)) {
1402 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1403 if (ret) {
1404 perror("qemu_madvise");
1405 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1406 "but dump_guest_core=off specified\n");
1411 /* Called within an RCU critical section, or while the ramlist lock
1412 * is held.
1414 static RAMBlock *find_ram_block(ram_addr_t addr)
1416 RAMBlock *block;
1418 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1419 if (block->offset == addr) {
1420 return block;
1424 return NULL;
1427 const char *qemu_ram_get_idstr(RAMBlock *rb)
1429 return rb->idstr;
1432 /* Called with iothread lock held. */
1433 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1435 RAMBlock *new_block, *block;
1437 rcu_read_lock();
1438 new_block = find_ram_block(addr);
1439 assert(new_block);
1440 assert(!new_block->idstr[0]);
1442 if (dev) {
1443 char *id = qdev_get_dev_path(dev);
1444 if (id) {
1445 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1446 g_free(id);
1449 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1451 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1452 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1453 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1454 new_block->idstr);
1455 abort();
1458 rcu_read_unlock();
1461 /* Called with iothread lock held. */
1462 void qemu_ram_unset_idstr(ram_addr_t addr)
1464 RAMBlock *block;
1466 /* FIXME: arch_init.c assumes that this is not called throughout
1467 * migration. Ignore the problem since hot-unplug during migration
1468 * does not work anyway.
1471 rcu_read_lock();
1472 block = find_ram_block(addr);
1473 if (block) {
1474 memset(block->idstr, 0, sizeof(block->idstr));
1476 rcu_read_unlock();
1479 static int memory_try_enable_merging(void *addr, size_t len)
1481 if (!machine_mem_merge(current_machine)) {
1482 /* disabled by the user */
1483 return 0;
1486 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1489 /* Only legal before guest might have detected the memory size: e.g. on
1490 * incoming migration, or right after reset.
1492 * As memory core doesn't know how is memory accessed, it is up to
1493 * resize callback to update device state and/or add assertions to detect
1494 * misuse, if necessary.
1496 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1498 RAMBlock *block = find_ram_block(base);
1500 assert(block);
1502 newsize = HOST_PAGE_ALIGN(newsize);
1504 if (block->used_length == newsize) {
1505 return 0;
1508 if (!(block->flags & RAM_RESIZEABLE)) {
1509 error_setg_errno(errp, EINVAL,
1510 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1511 " in != 0x" RAM_ADDR_FMT, block->idstr,
1512 newsize, block->used_length);
1513 return -EINVAL;
1516 if (block->max_length < newsize) {
1517 error_setg_errno(errp, EINVAL,
1518 "Length too large: %s: 0x" RAM_ADDR_FMT
1519 " > 0x" RAM_ADDR_FMT, block->idstr,
1520 newsize, block->max_length);
1521 return -EINVAL;
1524 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1525 block->used_length = newsize;
1526 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1527 DIRTY_CLIENTS_ALL);
1528 memory_region_set_size(block->mr, newsize);
1529 if (block->resized) {
1530 block->resized(block->idstr, newsize, block->host);
1532 return 0;
1535 /* Called with ram_list.mutex held */
1536 static void dirty_memory_extend(ram_addr_t old_ram_size,
1537 ram_addr_t new_ram_size)
1539 ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1540 DIRTY_MEMORY_BLOCK_SIZE);
1541 ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1542 DIRTY_MEMORY_BLOCK_SIZE);
1543 int i;
1545 /* Only need to extend if block count increased */
1546 if (new_num_blocks <= old_num_blocks) {
1547 return;
1550 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1551 DirtyMemoryBlocks *old_blocks;
1552 DirtyMemoryBlocks *new_blocks;
1553 int j;
1555 old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1556 new_blocks = g_malloc(sizeof(*new_blocks) +
1557 sizeof(new_blocks->blocks[0]) * new_num_blocks);
1559 if (old_num_blocks) {
1560 memcpy(new_blocks->blocks, old_blocks->blocks,
1561 old_num_blocks * sizeof(old_blocks->blocks[0]));
1564 for (j = old_num_blocks; j < new_num_blocks; j++) {
1565 new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1568 atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1570 if (old_blocks) {
1571 g_free_rcu(old_blocks, rcu);
1576 static void ram_block_add(RAMBlock *new_block, Error **errp)
1578 RAMBlock *block;
1579 RAMBlock *last_block = NULL;
1580 ram_addr_t old_ram_size, new_ram_size;
1581 Error *err = NULL;
1583 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1585 qemu_mutex_lock_ramlist();
1586 new_block->offset = find_ram_offset(new_block->max_length);
1588 if (!new_block->host) {
1589 if (xen_enabled()) {
1590 xen_ram_alloc(new_block->offset, new_block->max_length,
1591 new_block->mr, &err);
1592 if (err) {
1593 error_propagate(errp, err);
1594 qemu_mutex_unlock_ramlist();
1595 return;
1597 } else {
1598 new_block->host = phys_mem_alloc(new_block->max_length,
1599 &new_block->mr->align);
1600 if (!new_block->host) {
1601 error_setg_errno(errp, errno,
1602 "cannot set up guest memory '%s'",
1603 memory_region_name(new_block->mr));
1604 qemu_mutex_unlock_ramlist();
1605 return;
1607 memory_try_enable_merging(new_block->host, new_block->max_length);
1611 new_ram_size = MAX(old_ram_size,
1612 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1613 if (new_ram_size > old_ram_size) {
1614 migration_bitmap_extend(old_ram_size, new_ram_size);
1615 dirty_memory_extend(old_ram_size, new_ram_size);
1617 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1618 * QLIST (which has an RCU-friendly variant) does not have insertion at
1619 * tail, so save the last element in last_block.
1621 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1622 last_block = block;
1623 if (block->max_length < new_block->max_length) {
1624 break;
1627 if (block) {
1628 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1629 } else if (last_block) {
1630 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1631 } else { /* list is empty */
1632 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1634 ram_list.mru_block = NULL;
1636 /* Write list before version */
1637 smp_wmb();
1638 ram_list.version++;
1639 qemu_mutex_unlock_ramlist();
1641 cpu_physical_memory_set_dirty_range(new_block->offset,
1642 new_block->used_length,
1643 DIRTY_CLIENTS_ALL);
1645 if (new_block->host) {
1646 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1647 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1648 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1649 if (kvm_enabled()) {
1650 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1655 #ifdef __linux__
1656 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1657 bool share, const char *mem_path,
1658 Error **errp)
1660 RAMBlock *new_block;
1661 Error *local_err = NULL;
1663 if (xen_enabled()) {
1664 error_setg(errp, "-mem-path not supported with Xen");
1665 return NULL;
1668 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1670 * file_ram_alloc() needs to allocate just like
1671 * phys_mem_alloc, but we haven't bothered to provide
1672 * a hook there.
1674 error_setg(errp,
1675 "-mem-path not supported with this accelerator");
1676 return NULL;
1679 size = HOST_PAGE_ALIGN(size);
1680 new_block = g_malloc0(sizeof(*new_block));
1681 new_block->mr = mr;
1682 new_block->used_length = size;
1683 new_block->max_length = size;
1684 new_block->flags = share ? RAM_SHARED : 0;
1685 new_block->host = file_ram_alloc(new_block, size,
1686 mem_path, errp);
1687 if (!new_block->host) {
1688 g_free(new_block);
1689 return NULL;
1692 ram_block_add(new_block, &local_err);
1693 if (local_err) {
1694 g_free(new_block);
1695 error_propagate(errp, local_err);
1696 return NULL;
1698 return new_block;
1700 #endif
1702 static
1703 RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1704 void (*resized)(const char*,
1705 uint64_t length,
1706 void *host),
1707 void *host, bool resizeable,
1708 MemoryRegion *mr, Error **errp)
1710 RAMBlock *new_block;
1711 Error *local_err = NULL;
1713 size = HOST_PAGE_ALIGN(size);
1714 max_size = HOST_PAGE_ALIGN(max_size);
1715 new_block = g_malloc0(sizeof(*new_block));
1716 new_block->mr = mr;
1717 new_block->resized = resized;
1718 new_block->used_length = size;
1719 new_block->max_length = max_size;
1720 assert(max_size >= size);
1721 new_block->fd = -1;
1722 new_block->host = host;
1723 if (host) {
1724 new_block->flags |= RAM_PREALLOC;
1726 if (resizeable) {
1727 new_block->flags |= RAM_RESIZEABLE;
1729 ram_block_add(new_block, &local_err);
1730 if (local_err) {
1731 g_free(new_block);
1732 error_propagate(errp, local_err);
1733 return NULL;
1735 return new_block;
1738 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1739 MemoryRegion *mr, Error **errp)
1741 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1744 RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1746 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1749 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1750 void (*resized)(const char*,
1751 uint64_t length,
1752 void *host),
1753 MemoryRegion *mr, Error **errp)
1755 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1758 static void reclaim_ramblock(RAMBlock *block)
1760 if (block->flags & RAM_PREALLOC) {
1762 } else if (xen_enabled()) {
1763 xen_invalidate_map_cache_entry(block->host);
1764 #ifndef _WIN32
1765 } else if (block->fd >= 0) {
1766 qemu_ram_munmap(block->host, block->max_length);
1767 close(block->fd);
1768 #endif
1769 } else {
1770 qemu_anon_ram_free(block->host, block->max_length);
1772 g_free(block);
1775 void qemu_ram_free(RAMBlock *block)
1777 if (!block) {
1778 return;
1781 qemu_mutex_lock_ramlist();
1782 QLIST_REMOVE_RCU(block, next);
1783 ram_list.mru_block = NULL;
1784 /* Write list before version */
1785 smp_wmb();
1786 ram_list.version++;
1787 call_rcu(block, reclaim_ramblock, rcu);
1788 qemu_mutex_unlock_ramlist();
1791 #ifndef _WIN32
1792 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1794 RAMBlock *block;
1795 ram_addr_t offset;
1796 int flags;
1797 void *area, *vaddr;
1799 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1800 offset = addr - block->offset;
1801 if (offset < block->max_length) {
1802 vaddr = ramblock_ptr(block, offset);
1803 if (block->flags & RAM_PREALLOC) {
1805 } else if (xen_enabled()) {
1806 abort();
1807 } else {
1808 flags = MAP_FIXED;
1809 if (block->fd >= 0) {
1810 flags |= (block->flags & RAM_SHARED ?
1811 MAP_SHARED : MAP_PRIVATE);
1812 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1813 flags, block->fd, offset);
1814 } else {
1816 * Remap needs to match alloc. Accelerators that
1817 * set phys_mem_alloc never remap. If they did,
1818 * we'd need a remap hook here.
1820 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1822 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1823 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1824 flags, -1, 0);
1826 if (area != vaddr) {
1827 fprintf(stderr, "Could not remap addr: "
1828 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1829 length, addr);
1830 exit(1);
1832 memory_try_enable_merging(vaddr, length);
1833 qemu_ram_setup_dump(vaddr, length);
1838 #endif /* !_WIN32 */
1840 int qemu_get_ram_fd(ram_addr_t addr)
1842 RAMBlock *block;
1843 int fd;
1845 rcu_read_lock();
1846 block = qemu_get_ram_block(addr);
1847 fd = block->fd;
1848 rcu_read_unlock();
1849 return fd;
1852 void qemu_set_ram_fd(ram_addr_t addr, int fd)
1854 RAMBlock *block;
1856 rcu_read_lock();
1857 block = qemu_get_ram_block(addr);
1858 block->fd = fd;
1859 rcu_read_unlock();
1862 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1864 RAMBlock *block;
1865 void *ptr;
1867 rcu_read_lock();
1868 block = qemu_get_ram_block(addr);
1869 ptr = ramblock_ptr(block, 0);
1870 rcu_read_unlock();
1871 return ptr;
1874 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1875 * This should not be used for general purpose DMA. Use address_space_map
1876 * or address_space_rw instead. For local memory (e.g. video ram) that the
1877 * device owns, use memory_region_get_ram_ptr.
1879 * Called within RCU critical section.
1881 void *qemu_get_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1883 RAMBlock *block = ram_block;
1885 if (block == NULL) {
1886 block = qemu_get_ram_block(addr);
1889 if (xen_enabled() && block->host == NULL) {
1890 /* We need to check if the requested address is in the RAM
1891 * because we don't want to map the entire memory in QEMU.
1892 * In that case just map until the end of the page.
1894 if (block->offset == 0) {
1895 return xen_map_cache(addr, 0, 0);
1898 block->host = xen_map_cache(block->offset, block->max_length, 1);
1900 return ramblock_ptr(block, addr - block->offset);
1903 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1904 * but takes a size argument.
1906 * Called within RCU critical section.
1908 static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1909 hwaddr *size)
1911 RAMBlock *block = ram_block;
1912 ram_addr_t offset_inside_block;
1913 if (*size == 0) {
1914 return NULL;
1917 if (block == NULL) {
1918 block = qemu_get_ram_block(addr);
1920 offset_inside_block = addr - block->offset;
1921 *size = MIN(*size, block->max_length - offset_inside_block);
1923 if (xen_enabled() && block->host == NULL) {
1924 /* We need to check if the requested address is in the RAM
1925 * because we don't want to map the entire memory in QEMU.
1926 * In that case just map the requested area.
1928 if (block->offset == 0) {
1929 return xen_map_cache(addr, *size, 1);
1932 block->host = xen_map_cache(block->offset, block->max_length, 1);
1935 return ramblock_ptr(block, offset_inside_block);
1939 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1940 * in that RAMBlock.
1942 * ptr: Host pointer to look up
1943 * round_offset: If true round the result offset down to a page boundary
1944 * *ram_addr: set to result ram_addr
1945 * *offset: set to result offset within the RAMBlock
1947 * Returns: RAMBlock (or NULL if not found)
1949 * By the time this function returns, the returned pointer is not protected
1950 * by RCU anymore. If the caller is not within an RCU critical section and
1951 * does not hold the iothread lock, it must have other means of protecting the
1952 * pointer, such as a reference to the region that includes the incoming
1953 * ram_addr_t.
1955 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1956 ram_addr_t *ram_addr,
1957 ram_addr_t *offset)
1959 RAMBlock *block;
1960 uint8_t *host = ptr;
1962 if (xen_enabled()) {
1963 rcu_read_lock();
1964 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1965 block = qemu_get_ram_block(*ram_addr);
1966 if (block) {
1967 *offset = (host - block->host);
1969 rcu_read_unlock();
1970 return block;
1973 rcu_read_lock();
1974 block = atomic_rcu_read(&ram_list.mru_block);
1975 if (block && block->host && host - block->host < block->max_length) {
1976 goto found;
1979 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1980 /* This case append when the block is not mapped. */
1981 if (block->host == NULL) {
1982 continue;
1984 if (host - block->host < block->max_length) {
1985 goto found;
1989 rcu_read_unlock();
1990 return NULL;
1992 found:
1993 *offset = (host - block->host);
1994 if (round_offset) {
1995 *offset &= TARGET_PAGE_MASK;
1997 *ram_addr = block->offset + *offset;
1998 rcu_read_unlock();
1999 return block;
2003 * Finds the named RAMBlock
2005 * name: The name of RAMBlock to find
2007 * Returns: RAMBlock (or NULL if not found)
2009 RAMBlock *qemu_ram_block_by_name(const char *name)
2011 RAMBlock *block;
2013 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2014 if (!strcmp(name, block->idstr)) {
2015 return block;
2019 return NULL;
2022 /* Some of the softmmu routines need to translate from a host pointer
2023 (typically a TLB entry) back to a ram offset. */
2024 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2026 RAMBlock *block;
2027 ram_addr_t offset; /* Not used */
2029 block = qemu_ram_block_from_host(ptr, false, ram_addr, &offset);
2031 if (!block) {
2032 return NULL;
2035 return block->mr;
2038 /* Called within RCU critical section. */
2039 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2040 uint64_t val, unsigned size)
2042 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
2043 tb_invalidate_phys_page_fast(ram_addr, size);
2045 switch (size) {
2046 case 1:
2047 stb_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2048 break;
2049 case 2:
2050 stw_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2051 break;
2052 case 4:
2053 stl_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2054 break;
2055 default:
2056 abort();
2058 /* Set both VGA and migration bits for simplicity and to remove
2059 * the notdirty callback faster.
2061 cpu_physical_memory_set_dirty_range(ram_addr, size,
2062 DIRTY_CLIENTS_NOCODE);
2063 /* we remove the notdirty callback only if the code has been
2064 flushed */
2065 if (!cpu_physical_memory_is_clean(ram_addr)) {
2066 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2070 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2071 unsigned size, bool is_write)
2073 return is_write;
2076 static const MemoryRegionOps notdirty_mem_ops = {
2077 .write = notdirty_mem_write,
2078 .valid.accepts = notdirty_mem_accepts,
2079 .endianness = DEVICE_NATIVE_ENDIAN,
2082 /* Generate a debug exception if a watchpoint has been hit. */
2083 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2085 CPUState *cpu = current_cpu;
2086 CPUClass *cc = CPU_GET_CLASS(cpu);
2087 CPUArchState *env = cpu->env_ptr;
2088 target_ulong pc, cs_base;
2089 target_ulong vaddr;
2090 CPUWatchpoint *wp;
2091 int cpu_flags;
2093 if (cpu->watchpoint_hit) {
2094 /* We re-entered the check after replacing the TB. Now raise
2095 * the debug interrupt so that is will trigger after the
2096 * current instruction. */
2097 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2098 return;
2100 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2101 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2102 if (cpu_watchpoint_address_matches(wp, vaddr, len)
2103 && (wp->flags & flags)) {
2104 if (flags == BP_MEM_READ) {
2105 wp->flags |= BP_WATCHPOINT_HIT_READ;
2106 } else {
2107 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2109 wp->hitaddr = vaddr;
2110 wp->hitattrs = attrs;
2111 if (!cpu->watchpoint_hit) {
2112 if (wp->flags & BP_CPU &&
2113 !cc->debug_check_watchpoint(cpu, wp)) {
2114 wp->flags &= ~BP_WATCHPOINT_HIT;
2115 continue;
2117 cpu->watchpoint_hit = wp;
2118 tb_check_watchpoint(cpu);
2119 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2120 cpu->exception_index = EXCP_DEBUG;
2121 cpu_loop_exit(cpu);
2122 } else {
2123 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2124 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2125 cpu_resume_from_signal(cpu, NULL);
2128 } else {
2129 wp->flags &= ~BP_WATCHPOINT_HIT;
2134 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2135 so these check for a hit then pass through to the normal out-of-line
2136 phys routines. */
2137 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2138 unsigned size, MemTxAttrs attrs)
2140 MemTxResult res;
2141 uint64_t data;
2142 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2143 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2145 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2146 switch (size) {
2147 case 1:
2148 data = address_space_ldub(as, addr, attrs, &res);
2149 break;
2150 case 2:
2151 data = address_space_lduw(as, addr, attrs, &res);
2152 break;
2153 case 4:
2154 data = address_space_ldl(as, addr, attrs, &res);
2155 break;
2156 default: abort();
2158 *pdata = data;
2159 return res;
2162 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2163 uint64_t val, unsigned size,
2164 MemTxAttrs attrs)
2166 MemTxResult res;
2167 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2168 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2170 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2171 switch (size) {
2172 case 1:
2173 address_space_stb(as, addr, val, attrs, &res);
2174 break;
2175 case 2:
2176 address_space_stw(as, addr, val, attrs, &res);
2177 break;
2178 case 4:
2179 address_space_stl(as, addr, val, attrs, &res);
2180 break;
2181 default: abort();
2183 return res;
2186 static const MemoryRegionOps watch_mem_ops = {
2187 .read_with_attrs = watch_mem_read,
2188 .write_with_attrs = watch_mem_write,
2189 .endianness = DEVICE_NATIVE_ENDIAN,
2192 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2193 unsigned len, MemTxAttrs attrs)
2195 subpage_t *subpage = opaque;
2196 uint8_t buf[8];
2197 MemTxResult res;
2199 #if defined(DEBUG_SUBPAGE)
2200 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2201 subpage, len, addr);
2202 #endif
2203 res = address_space_read(subpage->as, addr + subpage->base,
2204 attrs, buf, len);
2205 if (res) {
2206 return res;
2208 switch (len) {
2209 case 1:
2210 *data = ldub_p(buf);
2211 return MEMTX_OK;
2212 case 2:
2213 *data = lduw_p(buf);
2214 return MEMTX_OK;
2215 case 4:
2216 *data = ldl_p(buf);
2217 return MEMTX_OK;
2218 case 8:
2219 *data = ldq_p(buf);
2220 return MEMTX_OK;
2221 default:
2222 abort();
2226 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2227 uint64_t value, unsigned len, MemTxAttrs attrs)
2229 subpage_t *subpage = opaque;
2230 uint8_t buf[8];
2232 #if defined(DEBUG_SUBPAGE)
2233 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2234 " value %"PRIx64"\n",
2235 __func__, subpage, len, addr, value);
2236 #endif
2237 switch (len) {
2238 case 1:
2239 stb_p(buf, value);
2240 break;
2241 case 2:
2242 stw_p(buf, value);
2243 break;
2244 case 4:
2245 stl_p(buf, value);
2246 break;
2247 case 8:
2248 stq_p(buf, value);
2249 break;
2250 default:
2251 abort();
2253 return address_space_write(subpage->as, addr + subpage->base,
2254 attrs, buf, len);
2257 static bool subpage_accepts(void *opaque, hwaddr addr,
2258 unsigned len, bool is_write)
2260 subpage_t *subpage = opaque;
2261 #if defined(DEBUG_SUBPAGE)
2262 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2263 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2264 #endif
2266 return address_space_access_valid(subpage->as, addr + subpage->base,
2267 len, is_write);
2270 static const MemoryRegionOps subpage_ops = {
2271 .read_with_attrs = subpage_read,
2272 .write_with_attrs = subpage_write,
2273 .impl.min_access_size = 1,
2274 .impl.max_access_size = 8,
2275 .valid.min_access_size = 1,
2276 .valid.max_access_size = 8,
2277 .valid.accepts = subpage_accepts,
2278 .endianness = DEVICE_NATIVE_ENDIAN,
2281 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2282 uint16_t section)
2284 int idx, eidx;
2286 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2287 return -1;
2288 idx = SUBPAGE_IDX(start);
2289 eidx = SUBPAGE_IDX(end);
2290 #if defined(DEBUG_SUBPAGE)
2291 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2292 __func__, mmio, start, end, idx, eidx, section);
2293 #endif
2294 for (; idx <= eidx; idx++) {
2295 mmio->sub_section[idx] = section;
2298 return 0;
2301 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2303 subpage_t *mmio;
2305 mmio = g_malloc0(sizeof(subpage_t));
2307 mmio->as = as;
2308 mmio->base = base;
2309 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2310 NULL, TARGET_PAGE_SIZE);
2311 mmio->iomem.subpage = true;
2312 #if defined(DEBUG_SUBPAGE)
2313 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2314 mmio, base, TARGET_PAGE_SIZE);
2315 #endif
2316 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2318 return mmio;
2321 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2322 MemoryRegion *mr)
2324 assert(as);
2325 MemoryRegionSection section = {
2326 .address_space = as,
2327 .mr = mr,
2328 .offset_within_address_space = 0,
2329 .offset_within_region = 0,
2330 .size = int128_2_64(),
2333 return phys_section_add(map, &section);
2336 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2338 int asidx = cpu_asidx_from_attrs(cpu, attrs);
2339 CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2340 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2341 MemoryRegionSection *sections = d->map.sections;
2343 return sections[index & ~TARGET_PAGE_MASK].mr;
2346 static void io_mem_init(void)
2348 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2349 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2350 NULL, UINT64_MAX);
2351 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2352 NULL, UINT64_MAX);
2353 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2354 NULL, UINT64_MAX);
2357 static void mem_begin(MemoryListener *listener)
2359 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2360 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2361 uint16_t n;
2363 n = dummy_section(&d->map, as, &io_mem_unassigned);
2364 assert(n == PHYS_SECTION_UNASSIGNED);
2365 n = dummy_section(&d->map, as, &io_mem_notdirty);
2366 assert(n == PHYS_SECTION_NOTDIRTY);
2367 n = dummy_section(&d->map, as, &io_mem_rom);
2368 assert(n == PHYS_SECTION_ROM);
2369 n = dummy_section(&d->map, as, &io_mem_watch);
2370 assert(n == PHYS_SECTION_WATCH);
2372 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2373 d->as = as;
2374 as->next_dispatch = d;
2377 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2379 phys_sections_free(&d->map);
2380 g_free(d);
2383 static void mem_commit(MemoryListener *listener)
2385 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2386 AddressSpaceDispatch *cur = as->dispatch;
2387 AddressSpaceDispatch *next = as->next_dispatch;
2389 phys_page_compact_all(next, next->map.nodes_nb);
2391 atomic_rcu_set(&as->dispatch, next);
2392 if (cur) {
2393 call_rcu(cur, address_space_dispatch_free, rcu);
2397 static void tcg_commit(MemoryListener *listener)
2399 CPUAddressSpace *cpuas;
2400 AddressSpaceDispatch *d;
2402 /* since each CPU stores ram addresses in its TLB cache, we must
2403 reset the modified entries */
2404 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2405 cpu_reloading_memory_map();
2406 /* The CPU and TLB are protected by the iothread lock.
2407 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2408 * may have split the RCU critical section.
2410 d = atomic_rcu_read(&cpuas->as->dispatch);
2411 cpuas->memory_dispatch = d;
2412 tlb_flush(cpuas->cpu, 1);
2415 void address_space_init_dispatch(AddressSpace *as)
2417 as->dispatch = NULL;
2418 as->dispatch_listener = (MemoryListener) {
2419 .begin = mem_begin,
2420 .commit = mem_commit,
2421 .region_add = mem_add,
2422 .region_nop = mem_add,
2423 .priority = 0,
2425 memory_listener_register(&as->dispatch_listener, as);
2428 void address_space_unregister(AddressSpace *as)
2430 memory_listener_unregister(&as->dispatch_listener);
2433 void address_space_destroy_dispatch(AddressSpace *as)
2435 AddressSpaceDispatch *d = as->dispatch;
2437 atomic_rcu_set(&as->dispatch, NULL);
2438 if (d) {
2439 call_rcu(d, address_space_dispatch_free, rcu);
2443 static void memory_map_init(void)
2445 system_memory = g_malloc(sizeof(*system_memory));
2447 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2448 address_space_init(&address_space_memory, system_memory, "memory");
2450 system_io = g_malloc(sizeof(*system_io));
2451 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2452 65536);
2453 address_space_init(&address_space_io, system_io, "I/O");
2456 MemoryRegion *get_system_memory(void)
2458 return system_memory;
2461 MemoryRegion *get_system_io(void)
2463 return system_io;
2466 #endif /* !defined(CONFIG_USER_ONLY) */
2468 /* physical memory access (slow version, mainly for debug) */
2469 #if defined(CONFIG_USER_ONLY)
2470 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2471 uint8_t *buf, int len, int is_write)
2473 int l, flags;
2474 target_ulong page;
2475 void * p;
2477 while (len > 0) {
2478 page = addr & TARGET_PAGE_MASK;
2479 l = (page + TARGET_PAGE_SIZE) - addr;
2480 if (l > len)
2481 l = len;
2482 flags = page_get_flags(page);
2483 if (!(flags & PAGE_VALID))
2484 return -1;
2485 if (is_write) {
2486 if (!(flags & PAGE_WRITE))
2487 return -1;
2488 /* XXX: this code should not depend on lock_user */
2489 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2490 return -1;
2491 memcpy(p, buf, l);
2492 unlock_user(p, addr, l);
2493 } else {
2494 if (!(flags & PAGE_READ))
2495 return -1;
2496 /* XXX: this code should not depend on lock_user */
2497 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2498 return -1;
2499 memcpy(buf, p, l);
2500 unlock_user(p, addr, 0);
2502 len -= l;
2503 buf += l;
2504 addr += l;
2506 return 0;
2509 #else
2511 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2512 hwaddr length)
2514 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2515 /* No early return if dirty_log_mask is or becomes 0, because
2516 * cpu_physical_memory_set_dirty_range will still call
2517 * xen_modified_memory.
2519 if (dirty_log_mask) {
2520 dirty_log_mask =
2521 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2523 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2524 tb_invalidate_phys_range(addr, addr + length);
2525 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2527 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2530 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2532 unsigned access_size_max = mr->ops->valid.max_access_size;
2534 /* Regions are assumed to support 1-4 byte accesses unless
2535 otherwise specified. */
2536 if (access_size_max == 0) {
2537 access_size_max = 4;
2540 /* Bound the maximum access by the alignment of the address. */
2541 if (!mr->ops->impl.unaligned) {
2542 unsigned align_size_max = addr & -addr;
2543 if (align_size_max != 0 && align_size_max < access_size_max) {
2544 access_size_max = align_size_max;
2548 /* Don't attempt accesses larger than the maximum. */
2549 if (l > access_size_max) {
2550 l = access_size_max;
2552 l = pow2floor(l);
2554 return l;
2557 static bool prepare_mmio_access(MemoryRegion *mr)
2559 bool unlocked = !qemu_mutex_iothread_locked();
2560 bool release_lock = false;
2562 if (unlocked && mr->global_locking) {
2563 qemu_mutex_lock_iothread();
2564 unlocked = false;
2565 release_lock = true;
2567 if (mr->flush_coalesced_mmio) {
2568 if (unlocked) {
2569 qemu_mutex_lock_iothread();
2571 qemu_flush_coalesced_mmio_buffer();
2572 if (unlocked) {
2573 qemu_mutex_unlock_iothread();
2577 return release_lock;
2580 /* Called within RCU critical section. */
2581 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2582 MemTxAttrs attrs,
2583 const uint8_t *buf,
2584 int len, hwaddr addr1,
2585 hwaddr l, MemoryRegion *mr)
2587 uint8_t *ptr;
2588 uint64_t val;
2589 MemTxResult result = MEMTX_OK;
2590 bool release_lock = false;
2592 for (;;) {
2593 if (!memory_access_is_direct(mr, true)) {
2594 release_lock |= prepare_mmio_access(mr);
2595 l = memory_access_size(mr, l, addr1);
2596 /* XXX: could force current_cpu to NULL to avoid
2597 potential bugs */
2598 switch (l) {
2599 case 8:
2600 /* 64 bit write access */
2601 val = ldq_p(buf);
2602 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2603 attrs);
2604 break;
2605 case 4:
2606 /* 32 bit write access */
2607 val = ldl_p(buf);
2608 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2609 attrs);
2610 break;
2611 case 2:
2612 /* 16 bit write access */
2613 val = lduw_p(buf);
2614 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2615 attrs);
2616 break;
2617 case 1:
2618 /* 8 bit write access */
2619 val = ldub_p(buf);
2620 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2621 attrs);
2622 break;
2623 default:
2624 abort();
2626 } else {
2627 addr1 += memory_region_get_ram_addr(mr);
2628 /* RAM case */
2629 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
2630 memcpy(ptr, buf, l);
2631 invalidate_and_set_dirty(mr, addr1, l);
2634 if (release_lock) {
2635 qemu_mutex_unlock_iothread();
2636 release_lock = false;
2639 len -= l;
2640 buf += l;
2641 addr += l;
2643 if (!len) {
2644 break;
2647 l = len;
2648 mr = address_space_translate(as, addr, &addr1, &l, true);
2651 return result;
2654 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2655 const uint8_t *buf, int len)
2657 hwaddr l;
2658 hwaddr addr1;
2659 MemoryRegion *mr;
2660 MemTxResult result = MEMTX_OK;
2662 if (len > 0) {
2663 rcu_read_lock();
2664 l = len;
2665 mr = address_space_translate(as, addr, &addr1, &l, true);
2666 result = address_space_write_continue(as, addr, attrs, buf, len,
2667 addr1, l, mr);
2668 rcu_read_unlock();
2671 return result;
2674 /* Called within RCU critical section. */
2675 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2676 MemTxAttrs attrs, uint8_t *buf,
2677 int len, hwaddr addr1, hwaddr l,
2678 MemoryRegion *mr)
2680 uint8_t *ptr;
2681 uint64_t val;
2682 MemTxResult result = MEMTX_OK;
2683 bool release_lock = false;
2685 for (;;) {
2686 if (!memory_access_is_direct(mr, false)) {
2687 /* I/O case */
2688 release_lock |= prepare_mmio_access(mr);
2689 l = memory_access_size(mr, l, addr1);
2690 switch (l) {
2691 case 8:
2692 /* 64 bit read access */
2693 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2694 attrs);
2695 stq_p(buf, val);
2696 break;
2697 case 4:
2698 /* 32 bit read access */
2699 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2700 attrs);
2701 stl_p(buf, val);
2702 break;
2703 case 2:
2704 /* 16 bit read access */
2705 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2706 attrs);
2707 stw_p(buf, val);
2708 break;
2709 case 1:
2710 /* 8 bit read access */
2711 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2712 attrs);
2713 stb_p(buf, val);
2714 break;
2715 default:
2716 abort();
2718 } else {
2719 /* RAM case */
2720 ptr = qemu_get_ram_ptr(mr->ram_block,
2721 memory_region_get_ram_addr(mr) + addr1);
2722 memcpy(buf, ptr, l);
2725 if (release_lock) {
2726 qemu_mutex_unlock_iothread();
2727 release_lock = false;
2730 len -= l;
2731 buf += l;
2732 addr += l;
2734 if (!len) {
2735 break;
2738 l = len;
2739 mr = address_space_translate(as, addr, &addr1, &l, false);
2742 return result;
2745 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2746 MemTxAttrs attrs, uint8_t *buf, int len)
2748 hwaddr l;
2749 hwaddr addr1;
2750 MemoryRegion *mr;
2751 MemTxResult result = MEMTX_OK;
2753 if (len > 0) {
2754 rcu_read_lock();
2755 l = len;
2756 mr = address_space_translate(as, addr, &addr1, &l, false);
2757 result = address_space_read_continue(as, addr, attrs, buf, len,
2758 addr1, l, mr);
2759 rcu_read_unlock();
2762 return result;
2765 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2766 uint8_t *buf, int len, bool is_write)
2768 if (is_write) {
2769 return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2770 } else {
2771 return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2775 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2776 int len, int is_write)
2778 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2779 buf, len, is_write);
2782 enum write_rom_type {
2783 WRITE_DATA,
2784 FLUSH_CACHE,
2787 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2788 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2790 hwaddr l;
2791 uint8_t *ptr;
2792 hwaddr addr1;
2793 MemoryRegion *mr;
2795 rcu_read_lock();
2796 while (len > 0) {
2797 l = len;
2798 mr = address_space_translate(as, addr, &addr1, &l, true);
2800 if (!(memory_region_is_ram(mr) ||
2801 memory_region_is_romd(mr))) {
2802 l = memory_access_size(mr, l, addr1);
2803 } else {
2804 addr1 += memory_region_get_ram_addr(mr);
2805 /* ROM/RAM case */
2806 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
2807 switch (type) {
2808 case WRITE_DATA:
2809 memcpy(ptr, buf, l);
2810 invalidate_and_set_dirty(mr, addr1, l);
2811 break;
2812 case FLUSH_CACHE:
2813 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2814 break;
2817 len -= l;
2818 buf += l;
2819 addr += l;
2821 rcu_read_unlock();
2824 /* used for ROM loading : can write in RAM and ROM */
2825 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2826 const uint8_t *buf, int len)
2828 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2831 void cpu_flush_icache_range(hwaddr start, int len)
2834 * This function should do the same thing as an icache flush that was
2835 * triggered from within the guest. For TCG we are always cache coherent,
2836 * so there is no need to flush anything. For KVM / Xen we need to flush
2837 * the host's instruction cache at least.
2839 if (tcg_enabled()) {
2840 return;
2843 cpu_physical_memory_write_rom_internal(&address_space_memory,
2844 start, NULL, len, FLUSH_CACHE);
2847 typedef struct {
2848 MemoryRegion *mr;
2849 void *buffer;
2850 hwaddr addr;
2851 hwaddr len;
2852 bool in_use;
2853 } BounceBuffer;
2855 static BounceBuffer bounce;
2857 typedef struct MapClient {
2858 QEMUBH *bh;
2859 QLIST_ENTRY(MapClient) link;
2860 } MapClient;
2862 QemuMutex map_client_list_lock;
2863 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2864 = QLIST_HEAD_INITIALIZER(map_client_list);
2866 static void cpu_unregister_map_client_do(MapClient *client)
2868 QLIST_REMOVE(client, link);
2869 g_free(client);
2872 static void cpu_notify_map_clients_locked(void)
2874 MapClient *client;
2876 while (!QLIST_EMPTY(&map_client_list)) {
2877 client = QLIST_FIRST(&map_client_list);
2878 qemu_bh_schedule(client->bh);
2879 cpu_unregister_map_client_do(client);
2883 void cpu_register_map_client(QEMUBH *bh)
2885 MapClient *client = g_malloc(sizeof(*client));
2887 qemu_mutex_lock(&map_client_list_lock);
2888 client->bh = bh;
2889 QLIST_INSERT_HEAD(&map_client_list, client, link);
2890 if (!atomic_read(&bounce.in_use)) {
2891 cpu_notify_map_clients_locked();
2893 qemu_mutex_unlock(&map_client_list_lock);
2896 void cpu_exec_init_all(void)
2898 qemu_mutex_init(&ram_list.mutex);
2899 io_mem_init();
2900 memory_map_init();
2901 qemu_mutex_init(&map_client_list_lock);
2904 void cpu_unregister_map_client(QEMUBH *bh)
2906 MapClient *client;
2908 qemu_mutex_lock(&map_client_list_lock);
2909 QLIST_FOREACH(client, &map_client_list, link) {
2910 if (client->bh == bh) {
2911 cpu_unregister_map_client_do(client);
2912 break;
2915 qemu_mutex_unlock(&map_client_list_lock);
2918 static void cpu_notify_map_clients(void)
2920 qemu_mutex_lock(&map_client_list_lock);
2921 cpu_notify_map_clients_locked();
2922 qemu_mutex_unlock(&map_client_list_lock);
2925 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2927 MemoryRegion *mr;
2928 hwaddr l, xlat;
2930 rcu_read_lock();
2931 while (len > 0) {
2932 l = len;
2933 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2934 if (!memory_access_is_direct(mr, is_write)) {
2935 l = memory_access_size(mr, l, addr);
2936 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2937 return false;
2941 len -= l;
2942 addr += l;
2944 rcu_read_unlock();
2945 return true;
2948 /* Map a physical memory region into a host virtual address.
2949 * May map a subset of the requested range, given by and returned in *plen.
2950 * May return NULL if resources needed to perform the mapping are exhausted.
2951 * Use only for reads OR writes - not for read-modify-write operations.
2952 * Use cpu_register_map_client() to know when retrying the map operation is
2953 * likely to succeed.
2955 void *address_space_map(AddressSpace *as,
2956 hwaddr addr,
2957 hwaddr *plen,
2958 bool is_write)
2960 hwaddr len = *plen;
2961 hwaddr done = 0;
2962 hwaddr l, xlat, base;
2963 MemoryRegion *mr, *this_mr;
2964 ram_addr_t raddr;
2965 void *ptr;
2967 if (len == 0) {
2968 return NULL;
2971 l = len;
2972 rcu_read_lock();
2973 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2975 if (!memory_access_is_direct(mr, is_write)) {
2976 if (atomic_xchg(&bounce.in_use, true)) {
2977 rcu_read_unlock();
2978 return NULL;
2980 /* Avoid unbounded allocations */
2981 l = MIN(l, TARGET_PAGE_SIZE);
2982 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2983 bounce.addr = addr;
2984 bounce.len = l;
2986 memory_region_ref(mr);
2987 bounce.mr = mr;
2988 if (!is_write) {
2989 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2990 bounce.buffer, l);
2993 rcu_read_unlock();
2994 *plen = l;
2995 return bounce.buffer;
2998 base = xlat;
2999 raddr = memory_region_get_ram_addr(mr);
3001 for (;;) {
3002 len -= l;
3003 addr += l;
3004 done += l;
3005 if (len == 0) {
3006 break;
3009 l = len;
3010 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
3011 if (this_mr != mr || xlat != base + done) {
3012 break;
3016 memory_region_ref(mr);
3017 *plen = done;
3018 ptr = qemu_ram_ptr_length(mr->ram_block, raddr + base, plen);
3019 rcu_read_unlock();
3021 return ptr;
3024 /* Unmaps a memory region previously mapped by address_space_map().
3025 * Will also mark the memory as dirty if is_write == 1. access_len gives
3026 * the amount of memory that was actually read or written by the caller.
3028 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3029 int is_write, hwaddr access_len)
3031 if (buffer != bounce.buffer) {
3032 MemoryRegion *mr;
3033 ram_addr_t addr1;
3035 mr = qemu_ram_addr_from_host(buffer, &addr1);
3036 assert(mr != NULL);
3037 if (is_write) {
3038 invalidate_and_set_dirty(mr, addr1, access_len);
3040 if (xen_enabled()) {
3041 xen_invalidate_map_cache_entry(buffer);
3043 memory_region_unref(mr);
3044 return;
3046 if (is_write) {
3047 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
3048 bounce.buffer, access_len);
3050 qemu_vfree(bounce.buffer);
3051 bounce.buffer = NULL;
3052 memory_region_unref(bounce.mr);
3053 atomic_mb_set(&bounce.in_use, false);
3054 cpu_notify_map_clients();
3057 void *cpu_physical_memory_map(hwaddr addr,
3058 hwaddr *plen,
3059 int is_write)
3061 return address_space_map(&address_space_memory, addr, plen, is_write);
3064 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3065 int is_write, hwaddr access_len)
3067 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3070 /* warning: addr must be aligned */
3071 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3072 MemTxAttrs attrs,
3073 MemTxResult *result,
3074 enum device_endian endian)
3076 uint8_t *ptr;
3077 uint64_t val;
3078 MemoryRegion *mr;
3079 hwaddr l = 4;
3080 hwaddr addr1;
3081 MemTxResult r;
3082 bool release_lock = false;
3084 rcu_read_lock();
3085 mr = address_space_translate(as, addr, &addr1, &l, false);
3086 if (l < 4 || !memory_access_is_direct(mr, false)) {
3087 release_lock |= prepare_mmio_access(mr);
3089 /* I/O case */
3090 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3091 #if defined(TARGET_WORDS_BIGENDIAN)
3092 if (endian == DEVICE_LITTLE_ENDIAN) {
3093 val = bswap32(val);
3095 #else
3096 if (endian == DEVICE_BIG_ENDIAN) {
3097 val = bswap32(val);
3099 #endif
3100 } else {
3101 /* RAM case */
3102 ptr = qemu_get_ram_ptr(mr->ram_block,
3103 (memory_region_get_ram_addr(mr)
3104 & TARGET_PAGE_MASK)
3105 + addr1);
3106 switch (endian) {
3107 case DEVICE_LITTLE_ENDIAN:
3108 val = ldl_le_p(ptr);
3109 break;
3110 case DEVICE_BIG_ENDIAN:
3111 val = ldl_be_p(ptr);
3112 break;
3113 default:
3114 val = ldl_p(ptr);
3115 break;
3117 r = MEMTX_OK;
3119 if (result) {
3120 *result = r;
3122 if (release_lock) {
3123 qemu_mutex_unlock_iothread();
3125 rcu_read_unlock();
3126 return val;
3129 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3130 MemTxAttrs attrs, MemTxResult *result)
3132 return address_space_ldl_internal(as, addr, attrs, result,
3133 DEVICE_NATIVE_ENDIAN);
3136 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3137 MemTxAttrs attrs, MemTxResult *result)
3139 return address_space_ldl_internal(as, addr, attrs, result,
3140 DEVICE_LITTLE_ENDIAN);
3143 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3144 MemTxAttrs attrs, MemTxResult *result)
3146 return address_space_ldl_internal(as, addr, attrs, result,
3147 DEVICE_BIG_ENDIAN);
3150 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3152 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3155 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3157 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3160 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3162 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3165 /* warning: addr must be aligned */
3166 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3167 MemTxAttrs attrs,
3168 MemTxResult *result,
3169 enum device_endian endian)
3171 uint8_t *ptr;
3172 uint64_t val;
3173 MemoryRegion *mr;
3174 hwaddr l = 8;
3175 hwaddr addr1;
3176 MemTxResult r;
3177 bool release_lock = false;
3179 rcu_read_lock();
3180 mr = address_space_translate(as, addr, &addr1, &l,
3181 false);
3182 if (l < 8 || !memory_access_is_direct(mr, false)) {
3183 release_lock |= prepare_mmio_access(mr);
3185 /* I/O case */
3186 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3187 #if defined(TARGET_WORDS_BIGENDIAN)
3188 if (endian == DEVICE_LITTLE_ENDIAN) {
3189 val = bswap64(val);
3191 #else
3192 if (endian == DEVICE_BIG_ENDIAN) {
3193 val = bswap64(val);
3195 #endif
3196 } else {
3197 /* RAM case */
3198 ptr = qemu_get_ram_ptr(mr->ram_block,
3199 (memory_region_get_ram_addr(mr)
3200 & TARGET_PAGE_MASK)
3201 + addr1);
3202 switch (endian) {
3203 case DEVICE_LITTLE_ENDIAN:
3204 val = ldq_le_p(ptr);
3205 break;
3206 case DEVICE_BIG_ENDIAN:
3207 val = ldq_be_p(ptr);
3208 break;
3209 default:
3210 val = ldq_p(ptr);
3211 break;
3213 r = MEMTX_OK;
3215 if (result) {
3216 *result = r;
3218 if (release_lock) {
3219 qemu_mutex_unlock_iothread();
3221 rcu_read_unlock();
3222 return val;
3225 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3226 MemTxAttrs attrs, MemTxResult *result)
3228 return address_space_ldq_internal(as, addr, attrs, result,
3229 DEVICE_NATIVE_ENDIAN);
3232 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3233 MemTxAttrs attrs, MemTxResult *result)
3235 return address_space_ldq_internal(as, addr, attrs, result,
3236 DEVICE_LITTLE_ENDIAN);
3239 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3240 MemTxAttrs attrs, MemTxResult *result)
3242 return address_space_ldq_internal(as, addr, attrs, result,
3243 DEVICE_BIG_ENDIAN);
3246 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3248 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3251 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3253 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3256 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3258 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3261 /* XXX: optimize */
3262 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3263 MemTxAttrs attrs, MemTxResult *result)
3265 uint8_t val;
3266 MemTxResult r;
3268 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3269 if (result) {
3270 *result = r;
3272 return val;
3275 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3277 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3280 /* warning: addr must be aligned */
3281 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3282 hwaddr addr,
3283 MemTxAttrs attrs,
3284 MemTxResult *result,
3285 enum device_endian endian)
3287 uint8_t *ptr;
3288 uint64_t val;
3289 MemoryRegion *mr;
3290 hwaddr l = 2;
3291 hwaddr addr1;
3292 MemTxResult r;
3293 bool release_lock = false;
3295 rcu_read_lock();
3296 mr = address_space_translate(as, addr, &addr1, &l,
3297 false);
3298 if (l < 2 || !memory_access_is_direct(mr, false)) {
3299 release_lock |= prepare_mmio_access(mr);
3301 /* I/O case */
3302 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3303 #if defined(TARGET_WORDS_BIGENDIAN)
3304 if (endian == DEVICE_LITTLE_ENDIAN) {
3305 val = bswap16(val);
3307 #else
3308 if (endian == DEVICE_BIG_ENDIAN) {
3309 val = bswap16(val);
3311 #endif
3312 } else {
3313 /* RAM case */
3314 ptr = qemu_get_ram_ptr(mr->ram_block,
3315 (memory_region_get_ram_addr(mr)
3316 & TARGET_PAGE_MASK)
3317 + addr1);
3318 switch (endian) {
3319 case DEVICE_LITTLE_ENDIAN:
3320 val = lduw_le_p(ptr);
3321 break;
3322 case DEVICE_BIG_ENDIAN:
3323 val = lduw_be_p(ptr);
3324 break;
3325 default:
3326 val = lduw_p(ptr);
3327 break;
3329 r = MEMTX_OK;
3331 if (result) {
3332 *result = r;
3334 if (release_lock) {
3335 qemu_mutex_unlock_iothread();
3337 rcu_read_unlock();
3338 return val;
3341 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3342 MemTxAttrs attrs, MemTxResult *result)
3344 return address_space_lduw_internal(as, addr, attrs, result,
3345 DEVICE_NATIVE_ENDIAN);
3348 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3349 MemTxAttrs attrs, MemTxResult *result)
3351 return address_space_lduw_internal(as, addr, attrs, result,
3352 DEVICE_LITTLE_ENDIAN);
3355 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3356 MemTxAttrs attrs, MemTxResult *result)
3358 return address_space_lduw_internal(as, addr, attrs, result,
3359 DEVICE_BIG_ENDIAN);
3362 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3364 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3367 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3369 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3372 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3374 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3377 /* warning: addr must be aligned. The ram page is not masked as dirty
3378 and the code inside is not invalidated. It is useful if the dirty
3379 bits are used to track modified PTEs */
3380 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3381 MemTxAttrs attrs, MemTxResult *result)
3383 uint8_t *ptr;
3384 MemoryRegion *mr;
3385 hwaddr l = 4;
3386 hwaddr addr1;
3387 MemTxResult r;
3388 uint8_t dirty_log_mask;
3389 bool release_lock = false;
3391 rcu_read_lock();
3392 mr = address_space_translate(as, addr, &addr1, &l,
3393 true);
3394 if (l < 4 || !memory_access_is_direct(mr, true)) {
3395 release_lock |= prepare_mmio_access(mr);
3397 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3398 } else {
3399 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3400 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3401 stl_p(ptr, val);
3403 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3404 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3405 cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3406 r = MEMTX_OK;
3408 if (result) {
3409 *result = r;
3411 if (release_lock) {
3412 qemu_mutex_unlock_iothread();
3414 rcu_read_unlock();
3417 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3419 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3422 /* warning: addr must be aligned */
3423 static inline void address_space_stl_internal(AddressSpace *as,
3424 hwaddr addr, uint32_t val,
3425 MemTxAttrs attrs,
3426 MemTxResult *result,
3427 enum device_endian endian)
3429 uint8_t *ptr;
3430 MemoryRegion *mr;
3431 hwaddr l = 4;
3432 hwaddr addr1;
3433 MemTxResult r;
3434 bool release_lock = false;
3436 rcu_read_lock();
3437 mr = address_space_translate(as, addr, &addr1, &l,
3438 true);
3439 if (l < 4 || !memory_access_is_direct(mr, true)) {
3440 release_lock |= prepare_mmio_access(mr);
3442 #if defined(TARGET_WORDS_BIGENDIAN)
3443 if (endian == DEVICE_LITTLE_ENDIAN) {
3444 val = bswap32(val);
3446 #else
3447 if (endian == DEVICE_BIG_ENDIAN) {
3448 val = bswap32(val);
3450 #endif
3451 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3452 } else {
3453 /* RAM case */
3454 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3455 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3456 switch (endian) {
3457 case DEVICE_LITTLE_ENDIAN:
3458 stl_le_p(ptr, val);
3459 break;
3460 case DEVICE_BIG_ENDIAN:
3461 stl_be_p(ptr, val);
3462 break;
3463 default:
3464 stl_p(ptr, val);
3465 break;
3467 invalidate_and_set_dirty(mr, addr1, 4);
3468 r = MEMTX_OK;
3470 if (result) {
3471 *result = r;
3473 if (release_lock) {
3474 qemu_mutex_unlock_iothread();
3476 rcu_read_unlock();
3479 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3480 MemTxAttrs attrs, MemTxResult *result)
3482 address_space_stl_internal(as, addr, val, attrs, result,
3483 DEVICE_NATIVE_ENDIAN);
3486 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3487 MemTxAttrs attrs, MemTxResult *result)
3489 address_space_stl_internal(as, addr, val, attrs, result,
3490 DEVICE_LITTLE_ENDIAN);
3493 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3494 MemTxAttrs attrs, MemTxResult *result)
3496 address_space_stl_internal(as, addr, val, attrs, result,
3497 DEVICE_BIG_ENDIAN);
3500 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3502 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3505 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3507 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3510 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3512 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3515 /* XXX: optimize */
3516 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3517 MemTxAttrs attrs, MemTxResult *result)
3519 uint8_t v = val;
3520 MemTxResult r;
3522 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3523 if (result) {
3524 *result = r;
3528 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3530 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3533 /* warning: addr must be aligned */
3534 static inline void address_space_stw_internal(AddressSpace *as,
3535 hwaddr addr, uint32_t val,
3536 MemTxAttrs attrs,
3537 MemTxResult *result,
3538 enum device_endian endian)
3540 uint8_t *ptr;
3541 MemoryRegion *mr;
3542 hwaddr l = 2;
3543 hwaddr addr1;
3544 MemTxResult r;
3545 bool release_lock = false;
3547 rcu_read_lock();
3548 mr = address_space_translate(as, addr, &addr1, &l, true);
3549 if (l < 2 || !memory_access_is_direct(mr, true)) {
3550 release_lock |= prepare_mmio_access(mr);
3552 #if defined(TARGET_WORDS_BIGENDIAN)
3553 if (endian == DEVICE_LITTLE_ENDIAN) {
3554 val = bswap16(val);
3556 #else
3557 if (endian == DEVICE_BIG_ENDIAN) {
3558 val = bswap16(val);
3560 #endif
3561 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3562 } else {
3563 /* RAM case */
3564 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3565 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3566 switch (endian) {
3567 case DEVICE_LITTLE_ENDIAN:
3568 stw_le_p(ptr, val);
3569 break;
3570 case DEVICE_BIG_ENDIAN:
3571 stw_be_p(ptr, val);
3572 break;
3573 default:
3574 stw_p(ptr, val);
3575 break;
3577 invalidate_and_set_dirty(mr, addr1, 2);
3578 r = MEMTX_OK;
3580 if (result) {
3581 *result = r;
3583 if (release_lock) {
3584 qemu_mutex_unlock_iothread();
3586 rcu_read_unlock();
3589 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3590 MemTxAttrs attrs, MemTxResult *result)
3592 address_space_stw_internal(as, addr, val, attrs, result,
3593 DEVICE_NATIVE_ENDIAN);
3596 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3597 MemTxAttrs attrs, MemTxResult *result)
3599 address_space_stw_internal(as, addr, val, attrs, result,
3600 DEVICE_LITTLE_ENDIAN);
3603 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3604 MemTxAttrs attrs, MemTxResult *result)
3606 address_space_stw_internal(as, addr, val, attrs, result,
3607 DEVICE_BIG_ENDIAN);
3610 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3612 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3615 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3617 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3620 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3622 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3625 /* XXX: optimize */
3626 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3627 MemTxAttrs attrs, MemTxResult *result)
3629 MemTxResult r;
3630 val = tswap64(val);
3631 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3632 if (result) {
3633 *result = r;
3637 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3638 MemTxAttrs attrs, MemTxResult *result)
3640 MemTxResult r;
3641 val = cpu_to_le64(val);
3642 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3643 if (result) {
3644 *result = r;
3647 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3648 MemTxAttrs attrs, MemTxResult *result)
3650 MemTxResult r;
3651 val = cpu_to_be64(val);
3652 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3653 if (result) {
3654 *result = r;
3658 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3660 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3663 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3665 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3668 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3670 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3673 /* virtual memory access for debug (includes writing to ROM) */
3674 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3675 uint8_t *buf, int len, int is_write)
3677 int l;
3678 hwaddr phys_addr;
3679 target_ulong page;
3681 while (len > 0) {
3682 int asidx;
3683 MemTxAttrs attrs;
3685 page = addr & TARGET_PAGE_MASK;
3686 phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3687 asidx = cpu_asidx_from_attrs(cpu, attrs);
3688 /* if no physical page mapped, return an error */
3689 if (phys_addr == -1)
3690 return -1;
3691 l = (page + TARGET_PAGE_SIZE) - addr;
3692 if (l > len)
3693 l = len;
3694 phys_addr += (addr & ~TARGET_PAGE_MASK);
3695 if (is_write) {
3696 cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3697 phys_addr, buf, l);
3698 } else {
3699 address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3700 MEMTXATTRS_UNSPECIFIED,
3701 buf, l, 0);
3703 len -= l;
3704 buf += l;
3705 addr += l;
3707 return 0;
3711 * Allows code that needs to deal with migration bitmaps etc to still be built
3712 * target independent.
3714 size_t qemu_target_page_bits(void)
3716 return TARGET_PAGE_BITS;
3719 #endif
3722 * A helper function for the _utterly broken_ virtio device model to find out if
3723 * it's running on a big endian machine. Don't do this at home kids!
3725 bool target_words_bigendian(void);
3726 bool target_words_bigendian(void)
3728 #if defined(TARGET_WORDS_BIGENDIAN)
3729 return true;
3730 #else
3731 return false;
3732 #endif
3735 #ifndef CONFIG_USER_ONLY
3736 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3738 MemoryRegion*mr;
3739 hwaddr l = 1;
3740 bool res;
3742 rcu_read_lock();
3743 mr = address_space_translate(&address_space_memory,
3744 phys_addr, &phys_addr, &l, false);
3746 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3747 rcu_read_unlock();
3748 return res;
3751 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3753 RAMBlock *block;
3754 int ret = 0;
3756 rcu_read_lock();
3757 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3758 ret = func(block->idstr, block->host, block->offset,
3759 block->used_length, opaque);
3760 if (ret) {
3761 break;
3764 rcu_read_unlock();
3765 return ret;
3767 #endif