s390-ccw.img: rebuild image
[qemu.git] / exec.c
blobee45472cabdad2230fa2c268de076ec72da45a78
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include "qapi/error.h"
21 #ifndef _WIN32
22 #include <sys/mman.h>
23 #endif
25 #include "qemu/cutils.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
31 #endif
32 #include "hw/qdev.h"
33 #include "sysemu/kvm.h"
34 #include "sysemu/sysemu.h"
35 #include "hw/xen/xen.h"
36 #include "qemu/timer.h"
37 #include "qemu/config-file.h"
38 #include "qemu/error-report.h"
39 #include "exec/memory.h"
40 #include "sysemu/dma.h"
41 #include "exec/address-spaces.h"
42 #if defined(CONFIG_USER_ONLY)
43 #include <qemu.h>
44 #else /* !CONFIG_USER_ONLY */
45 #include "sysemu/xen-mapcache.h"
46 #include "trace.h"
47 #endif
48 #include "exec/cpu-all.h"
49 #include "qemu/rcu_queue.h"
50 #include "qemu/main-loop.h"
51 #include "translate-all.h"
52 #include "sysemu/replay.h"
54 #include "exec/memory-internal.h"
55 #include "exec/ram_addr.h"
56 #include "exec/log.h"
58 #include "qemu/range.h"
59 #ifndef _WIN32
60 #include "qemu/mmap-alloc.h"
61 #endif
63 //#define DEBUG_SUBPAGE
65 #if !defined(CONFIG_USER_ONLY)
66 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
67 * are protected by the ramlist lock.
69 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
71 static MemoryRegion *system_memory;
72 static MemoryRegion *system_io;
74 AddressSpace address_space_io;
75 AddressSpace address_space_memory;
77 MemoryRegion io_mem_rom, io_mem_notdirty;
78 static MemoryRegion io_mem_unassigned;
80 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
81 #define RAM_PREALLOC (1 << 0)
83 /* RAM is mmap-ed with MAP_SHARED */
84 #define RAM_SHARED (1 << 1)
86 /* Only a portion of RAM (used_length) is actually used, and migrated.
87 * This used_length size can change across reboots.
89 #define RAM_RESIZEABLE (1 << 2)
91 #endif
93 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
94 /* current CPU in the current thread. It is only valid inside
95 cpu_exec() */
96 __thread CPUState *current_cpu;
97 /* 0 = Do not count executed instructions.
98 1 = Precise instruction counting.
99 2 = Adaptive rate instruction counting. */
100 int use_icount;
102 #if !defined(CONFIG_USER_ONLY)
104 typedef struct PhysPageEntry PhysPageEntry;
106 struct PhysPageEntry {
107 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
108 uint32_t skip : 6;
109 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
110 uint32_t ptr : 26;
113 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
115 /* Size of the L2 (and L3, etc) page tables. */
116 #define ADDR_SPACE_BITS 64
118 #define P_L2_BITS 9
119 #define P_L2_SIZE (1 << P_L2_BITS)
121 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
123 typedef PhysPageEntry Node[P_L2_SIZE];
125 typedef struct PhysPageMap {
126 struct rcu_head rcu;
128 unsigned sections_nb;
129 unsigned sections_nb_alloc;
130 unsigned nodes_nb;
131 unsigned nodes_nb_alloc;
132 Node *nodes;
133 MemoryRegionSection *sections;
134 } PhysPageMap;
136 struct AddressSpaceDispatch {
137 struct rcu_head rcu;
139 MemoryRegionSection *mru_section;
140 /* This is a multi-level map on the physical address space.
141 * The bottom level has pointers to MemoryRegionSections.
143 PhysPageEntry phys_map;
144 PhysPageMap map;
145 AddressSpace *as;
148 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
149 typedef struct subpage_t {
150 MemoryRegion iomem;
151 AddressSpace *as;
152 hwaddr base;
153 uint16_t sub_section[TARGET_PAGE_SIZE];
154 } subpage_t;
156 #define PHYS_SECTION_UNASSIGNED 0
157 #define PHYS_SECTION_NOTDIRTY 1
158 #define PHYS_SECTION_ROM 2
159 #define PHYS_SECTION_WATCH 3
161 static void io_mem_init(void);
162 static void memory_map_init(void);
163 static void tcg_commit(MemoryListener *listener);
165 static MemoryRegion io_mem_watch;
168 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
169 * @cpu: the CPU whose AddressSpace this is
170 * @as: the AddressSpace itself
171 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
172 * @tcg_as_listener: listener for tracking changes to the AddressSpace
174 struct CPUAddressSpace {
175 CPUState *cpu;
176 AddressSpace *as;
177 struct AddressSpaceDispatch *memory_dispatch;
178 MemoryListener tcg_as_listener;
181 #endif
183 #if !defined(CONFIG_USER_ONLY)
185 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
187 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
188 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
189 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
190 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
194 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
196 unsigned i;
197 uint32_t ret;
198 PhysPageEntry e;
199 PhysPageEntry *p;
201 ret = map->nodes_nb++;
202 p = map->nodes[ret];
203 assert(ret != PHYS_MAP_NODE_NIL);
204 assert(ret != map->nodes_nb_alloc);
206 e.skip = leaf ? 0 : 1;
207 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
208 for (i = 0; i < P_L2_SIZE; ++i) {
209 memcpy(&p[i], &e, sizeof(e));
211 return ret;
214 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
215 hwaddr *index, hwaddr *nb, uint16_t leaf,
216 int level)
218 PhysPageEntry *p;
219 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
221 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
222 lp->ptr = phys_map_node_alloc(map, level == 0);
224 p = map->nodes[lp->ptr];
225 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
227 while (*nb && lp < &p[P_L2_SIZE]) {
228 if ((*index & (step - 1)) == 0 && *nb >= step) {
229 lp->skip = 0;
230 lp->ptr = leaf;
231 *index += step;
232 *nb -= step;
233 } else {
234 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
236 ++lp;
240 static void phys_page_set(AddressSpaceDispatch *d,
241 hwaddr index, hwaddr nb,
242 uint16_t leaf)
244 /* Wildly overreserve - it doesn't matter much. */
245 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
247 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
250 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
251 * and update our entry so we can skip it and go directly to the destination.
253 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
255 unsigned valid_ptr = P_L2_SIZE;
256 int valid = 0;
257 PhysPageEntry *p;
258 int i;
260 if (lp->ptr == PHYS_MAP_NODE_NIL) {
261 return;
264 p = nodes[lp->ptr];
265 for (i = 0; i < P_L2_SIZE; i++) {
266 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
267 continue;
270 valid_ptr = i;
271 valid++;
272 if (p[i].skip) {
273 phys_page_compact(&p[i], nodes, compacted);
277 /* We can only compress if there's only one child. */
278 if (valid != 1) {
279 return;
282 assert(valid_ptr < P_L2_SIZE);
284 /* Don't compress if it won't fit in the # of bits we have. */
285 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
286 return;
289 lp->ptr = p[valid_ptr].ptr;
290 if (!p[valid_ptr].skip) {
291 /* If our only child is a leaf, make this a leaf. */
292 /* By design, we should have made this node a leaf to begin with so we
293 * should never reach here.
294 * But since it's so simple to handle this, let's do it just in case we
295 * change this rule.
297 lp->skip = 0;
298 } else {
299 lp->skip += p[valid_ptr].skip;
303 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
305 DECLARE_BITMAP(compacted, nodes_nb);
307 if (d->phys_map.skip) {
308 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
312 static inline bool section_covers_addr(const MemoryRegionSection *section,
313 hwaddr addr)
315 /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
316 * the section must cover the entire address space.
318 return section->size.hi ||
319 range_covers_byte(section->offset_within_address_space,
320 section->size.lo, addr);
323 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
324 Node *nodes, MemoryRegionSection *sections)
326 PhysPageEntry *p;
327 hwaddr index = addr >> TARGET_PAGE_BITS;
328 int i;
330 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
331 if (lp.ptr == PHYS_MAP_NODE_NIL) {
332 return &sections[PHYS_SECTION_UNASSIGNED];
334 p = nodes[lp.ptr];
335 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
338 if (section_covers_addr(&sections[lp.ptr], addr)) {
339 return &sections[lp.ptr];
340 } else {
341 return &sections[PHYS_SECTION_UNASSIGNED];
345 bool memory_region_is_unassigned(MemoryRegion *mr)
347 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
348 && mr != &io_mem_watch;
351 /* Called from RCU critical section */
352 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
353 hwaddr addr,
354 bool resolve_subpage)
356 MemoryRegionSection *section = atomic_read(&d->mru_section);
357 subpage_t *subpage;
358 bool update;
360 if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
361 section_covers_addr(section, addr)) {
362 update = false;
363 } else {
364 section = phys_page_find(d->phys_map, addr, d->map.nodes,
365 d->map.sections);
366 update = true;
368 if (resolve_subpage && section->mr->subpage) {
369 subpage = container_of(section->mr, subpage_t, iomem);
370 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
372 if (update) {
373 atomic_set(&d->mru_section, section);
375 return section;
378 /* Called from RCU critical section */
379 static MemoryRegionSection *
380 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
381 hwaddr *plen, bool resolve_subpage)
383 MemoryRegionSection *section;
384 MemoryRegion *mr;
385 Int128 diff;
387 section = address_space_lookup_region(d, addr, resolve_subpage);
388 /* Compute offset within MemoryRegionSection */
389 addr -= section->offset_within_address_space;
391 /* Compute offset within MemoryRegion */
392 *xlat = addr + section->offset_within_region;
394 mr = section->mr;
396 /* MMIO registers can be expected to perform full-width accesses based only
397 * on their address, without considering adjacent registers that could
398 * decode to completely different MemoryRegions. When such registers
399 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
400 * regions overlap wildly. For this reason we cannot clamp the accesses
401 * here.
403 * If the length is small (as is the case for address_space_ldl/stl),
404 * everything works fine. If the incoming length is large, however,
405 * the caller really has to do the clamping through memory_access_size.
407 if (memory_region_is_ram(mr)) {
408 diff = int128_sub(section->size, int128_make64(addr));
409 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
411 return section;
414 /* Called from RCU critical section */
415 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
416 hwaddr *xlat, hwaddr *plen,
417 bool is_write)
419 IOMMUTLBEntry iotlb;
420 MemoryRegionSection *section;
421 MemoryRegion *mr;
423 for (;;) {
424 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
425 section = address_space_translate_internal(d, addr, &addr, plen, true);
426 mr = section->mr;
428 if (!mr->iommu_ops) {
429 break;
432 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
433 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
434 | (addr & iotlb.addr_mask));
435 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
436 if (!(iotlb.perm & (1 << is_write))) {
437 mr = &io_mem_unassigned;
438 break;
441 as = iotlb.target_as;
444 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
445 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
446 *plen = MIN(page, *plen);
449 *xlat = addr;
450 return mr;
453 /* Called from RCU critical section */
454 MemoryRegionSection *
455 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
456 hwaddr *xlat, hwaddr *plen)
458 MemoryRegionSection *section;
459 AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;
461 section = address_space_translate_internal(d, addr, xlat, plen, false);
463 assert(!section->mr->iommu_ops);
464 return section;
466 #endif
468 #if !defined(CONFIG_USER_ONLY)
470 static int cpu_common_post_load(void *opaque, int version_id)
472 CPUState *cpu = opaque;
474 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
475 version_id is increased. */
476 cpu->interrupt_request &= ~0x01;
477 tlb_flush(cpu, 1);
479 return 0;
482 static int cpu_common_pre_load(void *opaque)
484 CPUState *cpu = opaque;
486 cpu->exception_index = -1;
488 return 0;
491 static bool cpu_common_exception_index_needed(void *opaque)
493 CPUState *cpu = opaque;
495 return tcg_enabled() && cpu->exception_index != -1;
498 static const VMStateDescription vmstate_cpu_common_exception_index = {
499 .name = "cpu_common/exception_index",
500 .version_id = 1,
501 .minimum_version_id = 1,
502 .needed = cpu_common_exception_index_needed,
503 .fields = (VMStateField[]) {
504 VMSTATE_INT32(exception_index, CPUState),
505 VMSTATE_END_OF_LIST()
509 static bool cpu_common_crash_occurred_needed(void *opaque)
511 CPUState *cpu = opaque;
513 return cpu->crash_occurred;
516 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
517 .name = "cpu_common/crash_occurred",
518 .version_id = 1,
519 .minimum_version_id = 1,
520 .needed = cpu_common_crash_occurred_needed,
521 .fields = (VMStateField[]) {
522 VMSTATE_BOOL(crash_occurred, CPUState),
523 VMSTATE_END_OF_LIST()
527 const VMStateDescription vmstate_cpu_common = {
528 .name = "cpu_common",
529 .version_id = 1,
530 .minimum_version_id = 1,
531 .pre_load = cpu_common_pre_load,
532 .post_load = cpu_common_post_load,
533 .fields = (VMStateField[]) {
534 VMSTATE_UINT32(halted, CPUState),
535 VMSTATE_UINT32(interrupt_request, CPUState),
536 VMSTATE_END_OF_LIST()
538 .subsections = (const VMStateDescription*[]) {
539 &vmstate_cpu_common_exception_index,
540 &vmstate_cpu_common_crash_occurred,
541 NULL
545 #endif
547 CPUState *qemu_get_cpu(int index)
549 CPUState *cpu;
551 CPU_FOREACH(cpu) {
552 if (cpu->cpu_index == index) {
553 return cpu;
557 return NULL;
560 #if !defined(CONFIG_USER_ONLY)
561 void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
563 CPUAddressSpace *newas;
565 /* Target code should have set num_ases before calling us */
566 assert(asidx < cpu->num_ases);
568 if (asidx == 0) {
569 /* address space 0 gets the convenience alias */
570 cpu->as = as;
573 /* KVM cannot currently support multiple address spaces. */
574 assert(asidx == 0 || !kvm_enabled());
576 if (!cpu->cpu_ases) {
577 cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
580 newas = &cpu->cpu_ases[asidx];
581 newas->cpu = cpu;
582 newas->as = as;
583 if (tcg_enabled()) {
584 newas->tcg_as_listener.commit = tcg_commit;
585 memory_listener_register(&newas->tcg_as_listener, as);
589 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
591 /* Return the AddressSpace corresponding to the specified index */
592 return cpu->cpu_ases[asidx].as;
594 #endif
596 #ifndef CONFIG_USER_ONLY
597 static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
599 static int cpu_get_free_index(Error **errp)
601 int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
603 if (cpu >= MAX_CPUMASK_BITS) {
604 error_setg(errp, "Trying to use more CPUs than max of %d",
605 MAX_CPUMASK_BITS);
606 return -1;
609 bitmap_set(cpu_index_map, cpu, 1);
610 return cpu;
613 void cpu_exec_exit(CPUState *cpu)
615 if (cpu->cpu_index == -1) {
616 /* cpu_index was never allocated by this @cpu or was already freed. */
617 return;
620 bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
621 cpu->cpu_index = -1;
623 #else
625 static int cpu_get_free_index(Error **errp)
627 CPUState *some_cpu;
628 int cpu_index = 0;
630 CPU_FOREACH(some_cpu) {
631 cpu_index++;
633 return cpu_index;
636 void cpu_exec_exit(CPUState *cpu)
639 #endif
641 void cpu_exec_init(CPUState *cpu, Error **errp)
643 CPUClass *cc = CPU_GET_CLASS(cpu);
644 int cpu_index;
645 Error *local_err = NULL;
647 cpu->as = NULL;
648 cpu->num_ases = 0;
650 #ifndef CONFIG_USER_ONLY
651 cpu->thread_id = qemu_get_thread_id();
653 /* This is a softmmu CPU object, so create a property for it
654 * so users can wire up its memory. (This can't go in qom/cpu.c
655 * because that file is compiled only once for both user-mode
656 * and system builds.) The default if no link is set up is to use
657 * the system address space.
659 object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
660 (Object **)&cpu->memory,
661 qdev_prop_allow_set_link_before_realize,
662 OBJ_PROP_LINK_UNREF_ON_RELEASE,
663 &error_abort);
664 cpu->memory = system_memory;
665 object_ref(OBJECT(cpu->memory));
666 #endif
668 #if defined(CONFIG_USER_ONLY)
669 cpu_list_lock();
670 #endif
671 cpu_index = cpu->cpu_index = cpu_get_free_index(&local_err);
672 if (local_err) {
673 error_propagate(errp, local_err);
674 #if defined(CONFIG_USER_ONLY)
675 cpu_list_unlock();
676 #endif
677 return;
679 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
680 #if defined(CONFIG_USER_ONLY)
681 cpu_list_unlock();
682 #endif
683 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
684 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
686 if (cc->vmsd != NULL) {
687 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
691 #if defined(CONFIG_USER_ONLY)
692 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
694 tb_invalidate_phys_page_range(pc, pc + 1, 0);
696 #else
697 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
699 MemTxAttrs attrs;
700 hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
701 int asidx = cpu_asidx_from_attrs(cpu, attrs);
702 if (phys != -1) {
703 tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
704 phys | (pc & ~TARGET_PAGE_MASK));
707 #endif
709 #if defined(CONFIG_USER_ONLY)
710 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
715 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
716 int flags)
718 return -ENOSYS;
721 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
725 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
726 int flags, CPUWatchpoint **watchpoint)
728 return -ENOSYS;
730 #else
731 /* Add a watchpoint. */
732 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
733 int flags, CPUWatchpoint **watchpoint)
735 CPUWatchpoint *wp;
737 /* forbid ranges which are empty or run off the end of the address space */
738 if (len == 0 || (addr + len - 1) < addr) {
739 error_report("tried to set invalid watchpoint at %"
740 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
741 return -EINVAL;
743 wp = g_malloc(sizeof(*wp));
745 wp->vaddr = addr;
746 wp->len = len;
747 wp->flags = flags;
749 /* keep all GDB-injected watchpoints in front */
750 if (flags & BP_GDB) {
751 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
752 } else {
753 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
756 tlb_flush_page(cpu, addr);
758 if (watchpoint)
759 *watchpoint = wp;
760 return 0;
763 /* Remove a specific watchpoint. */
764 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
765 int flags)
767 CPUWatchpoint *wp;
769 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
770 if (addr == wp->vaddr && len == wp->len
771 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
772 cpu_watchpoint_remove_by_ref(cpu, wp);
773 return 0;
776 return -ENOENT;
779 /* Remove a specific watchpoint by reference. */
780 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
782 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
784 tlb_flush_page(cpu, watchpoint->vaddr);
786 g_free(watchpoint);
789 /* Remove all matching watchpoints. */
790 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
792 CPUWatchpoint *wp, *next;
794 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
795 if (wp->flags & mask) {
796 cpu_watchpoint_remove_by_ref(cpu, wp);
801 /* Return true if this watchpoint address matches the specified
802 * access (ie the address range covered by the watchpoint overlaps
803 * partially or completely with the address range covered by the
804 * access).
806 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
807 vaddr addr,
808 vaddr len)
810 /* We know the lengths are non-zero, but a little caution is
811 * required to avoid errors in the case where the range ends
812 * exactly at the top of the address space and so addr + len
813 * wraps round to zero.
815 vaddr wpend = wp->vaddr + wp->len - 1;
816 vaddr addrend = addr + len - 1;
818 return !(addr > wpend || wp->vaddr > addrend);
821 #endif
823 /* Add a breakpoint. */
824 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
825 CPUBreakpoint **breakpoint)
827 CPUBreakpoint *bp;
829 bp = g_malloc(sizeof(*bp));
831 bp->pc = pc;
832 bp->flags = flags;
834 /* keep all GDB-injected breakpoints in front */
835 if (flags & BP_GDB) {
836 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
837 } else {
838 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
841 breakpoint_invalidate(cpu, pc);
843 if (breakpoint) {
844 *breakpoint = bp;
846 return 0;
849 /* Remove a specific breakpoint. */
850 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
852 CPUBreakpoint *bp;
854 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
855 if (bp->pc == pc && bp->flags == flags) {
856 cpu_breakpoint_remove_by_ref(cpu, bp);
857 return 0;
860 return -ENOENT;
863 /* Remove a specific breakpoint by reference. */
864 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
866 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
868 breakpoint_invalidate(cpu, breakpoint->pc);
870 g_free(breakpoint);
873 /* Remove all matching breakpoints. */
874 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
876 CPUBreakpoint *bp, *next;
878 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
879 if (bp->flags & mask) {
880 cpu_breakpoint_remove_by_ref(cpu, bp);
885 /* enable or disable single step mode. EXCP_DEBUG is returned by the
886 CPU loop after each instruction */
887 void cpu_single_step(CPUState *cpu, int enabled)
889 if (cpu->singlestep_enabled != enabled) {
890 cpu->singlestep_enabled = enabled;
891 if (kvm_enabled()) {
892 kvm_update_guest_debug(cpu, 0);
893 } else {
894 /* must flush all the translated code to avoid inconsistencies */
895 /* XXX: only flush what is necessary */
896 tb_flush(cpu);
901 void cpu_abort(CPUState *cpu, const char *fmt, ...)
903 va_list ap;
904 va_list ap2;
906 va_start(ap, fmt);
907 va_copy(ap2, ap);
908 fprintf(stderr, "qemu: fatal: ");
909 vfprintf(stderr, fmt, ap);
910 fprintf(stderr, "\n");
911 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
912 if (qemu_log_separate()) {
913 qemu_log("qemu: fatal: ");
914 qemu_log_vprintf(fmt, ap2);
915 qemu_log("\n");
916 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
917 qemu_log_flush();
918 qemu_log_close();
920 va_end(ap2);
921 va_end(ap);
922 replay_finish();
923 #if defined(CONFIG_USER_ONLY)
925 struct sigaction act;
926 sigfillset(&act.sa_mask);
927 act.sa_handler = SIG_DFL;
928 sigaction(SIGABRT, &act, NULL);
930 #endif
931 abort();
934 #if !defined(CONFIG_USER_ONLY)
935 /* Called from RCU critical section */
936 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
938 RAMBlock *block;
940 block = atomic_rcu_read(&ram_list.mru_block);
941 if (block && addr - block->offset < block->max_length) {
942 return block;
944 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
945 if (addr - block->offset < block->max_length) {
946 goto found;
950 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
951 abort();
953 found:
954 /* It is safe to write mru_block outside the iothread lock. This
955 * is what happens:
957 * mru_block = xxx
958 * rcu_read_unlock()
959 * xxx removed from list
960 * rcu_read_lock()
961 * read mru_block
962 * mru_block = NULL;
963 * call_rcu(reclaim_ramblock, xxx);
964 * rcu_read_unlock()
966 * atomic_rcu_set is not needed here. The block was already published
967 * when it was placed into the list. Here we're just making an extra
968 * copy of the pointer.
970 ram_list.mru_block = block;
971 return block;
974 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
976 CPUState *cpu;
977 ram_addr_t start1;
978 RAMBlock *block;
979 ram_addr_t end;
981 end = TARGET_PAGE_ALIGN(start + length);
982 start &= TARGET_PAGE_MASK;
984 rcu_read_lock();
985 block = qemu_get_ram_block(start);
986 assert(block == qemu_get_ram_block(end - 1));
987 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
988 CPU_FOREACH(cpu) {
989 tlb_reset_dirty(cpu, start1, length);
991 rcu_read_unlock();
994 /* Note: start and end must be within the same ram block. */
995 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
996 ram_addr_t length,
997 unsigned client)
999 DirtyMemoryBlocks *blocks;
1000 unsigned long end, page;
1001 bool dirty = false;
1003 if (length == 0) {
1004 return false;
1007 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
1008 page = start >> TARGET_PAGE_BITS;
1010 rcu_read_lock();
1012 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1014 while (page < end) {
1015 unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1016 unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1017 unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1019 dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1020 offset, num);
1021 page += num;
1024 rcu_read_unlock();
1026 if (dirty && tcg_enabled()) {
1027 tlb_reset_dirty_range_all(start, length);
1030 return dirty;
1033 /* Called from RCU critical section */
1034 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1035 MemoryRegionSection *section,
1036 target_ulong vaddr,
1037 hwaddr paddr, hwaddr xlat,
1038 int prot,
1039 target_ulong *address)
1041 hwaddr iotlb;
1042 CPUWatchpoint *wp;
1044 if (memory_region_is_ram(section->mr)) {
1045 /* Normal RAM. */
1046 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1047 + xlat;
1048 if (!section->readonly) {
1049 iotlb |= PHYS_SECTION_NOTDIRTY;
1050 } else {
1051 iotlb |= PHYS_SECTION_ROM;
1053 } else {
1054 AddressSpaceDispatch *d;
1056 d = atomic_rcu_read(&section->address_space->dispatch);
1057 iotlb = section - d->map.sections;
1058 iotlb += xlat;
1061 /* Make accesses to pages with watchpoints go via the
1062 watchpoint trap routines. */
1063 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1064 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1065 /* Avoid trapping reads of pages with a write breakpoint. */
1066 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1067 iotlb = PHYS_SECTION_WATCH + paddr;
1068 *address |= TLB_MMIO;
1069 break;
1074 return iotlb;
1076 #endif /* defined(CONFIG_USER_ONLY) */
1078 #if !defined(CONFIG_USER_ONLY)
1080 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1081 uint16_t section);
1082 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1084 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1085 qemu_anon_ram_alloc;
1088 * Set a custom physical guest memory alloator.
1089 * Accelerators with unusual needs may need this. Hopefully, we can
1090 * get rid of it eventually.
1092 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1094 phys_mem_alloc = alloc;
1097 static uint16_t phys_section_add(PhysPageMap *map,
1098 MemoryRegionSection *section)
1100 /* The physical section number is ORed with a page-aligned
1101 * pointer to produce the iotlb entries. Thus it should
1102 * never overflow into the page-aligned value.
1104 assert(map->sections_nb < TARGET_PAGE_SIZE);
1106 if (map->sections_nb == map->sections_nb_alloc) {
1107 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1108 map->sections = g_renew(MemoryRegionSection, map->sections,
1109 map->sections_nb_alloc);
1111 map->sections[map->sections_nb] = *section;
1112 memory_region_ref(section->mr);
1113 return map->sections_nb++;
1116 static void phys_section_destroy(MemoryRegion *mr)
1118 bool have_sub_page = mr->subpage;
1120 memory_region_unref(mr);
1122 if (have_sub_page) {
1123 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1124 object_unref(OBJECT(&subpage->iomem));
1125 g_free(subpage);
1129 static void phys_sections_free(PhysPageMap *map)
1131 while (map->sections_nb > 0) {
1132 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1133 phys_section_destroy(section->mr);
1135 g_free(map->sections);
1136 g_free(map->nodes);
1139 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1141 subpage_t *subpage;
1142 hwaddr base = section->offset_within_address_space
1143 & TARGET_PAGE_MASK;
1144 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1145 d->map.nodes, d->map.sections);
1146 MemoryRegionSection subsection = {
1147 .offset_within_address_space = base,
1148 .size = int128_make64(TARGET_PAGE_SIZE),
1150 hwaddr start, end;
1152 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1154 if (!(existing->mr->subpage)) {
1155 subpage = subpage_init(d->as, base);
1156 subsection.address_space = d->as;
1157 subsection.mr = &subpage->iomem;
1158 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1159 phys_section_add(&d->map, &subsection));
1160 } else {
1161 subpage = container_of(existing->mr, subpage_t, iomem);
1163 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1164 end = start + int128_get64(section->size) - 1;
1165 subpage_register(subpage, start, end,
1166 phys_section_add(&d->map, section));
1170 static void register_multipage(AddressSpaceDispatch *d,
1171 MemoryRegionSection *section)
1173 hwaddr start_addr = section->offset_within_address_space;
1174 uint16_t section_index = phys_section_add(&d->map, section);
1175 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1176 TARGET_PAGE_BITS));
1178 assert(num_pages);
1179 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1182 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1184 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1185 AddressSpaceDispatch *d = as->next_dispatch;
1186 MemoryRegionSection now = *section, remain = *section;
1187 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1189 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1190 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1191 - now.offset_within_address_space;
1193 now.size = int128_min(int128_make64(left), now.size);
1194 register_subpage(d, &now);
1195 } else {
1196 now.size = int128_zero();
1198 while (int128_ne(remain.size, now.size)) {
1199 remain.size = int128_sub(remain.size, now.size);
1200 remain.offset_within_address_space += int128_get64(now.size);
1201 remain.offset_within_region += int128_get64(now.size);
1202 now = remain;
1203 if (int128_lt(remain.size, page_size)) {
1204 register_subpage(d, &now);
1205 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1206 now.size = page_size;
1207 register_subpage(d, &now);
1208 } else {
1209 now.size = int128_and(now.size, int128_neg(page_size));
1210 register_multipage(d, &now);
1215 void qemu_flush_coalesced_mmio_buffer(void)
1217 if (kvm_enabled())
1218 kvm_flush_coalesced_mmio_buffer();
1221 void qemu_mutex_lock_ramlist(void)
1223 qemu_mutex_lock(&ram_list.mutex);
1226 void qemu_mutex_unlock_ramlist(void)
1228 qemu_mutex_unlock(&ram_list.mutex);
1231 #ifdef __linux__
1232 static void *file_ram_alloc(RAMBlock *block,
1233 ram_addr_t memory,
1234 const char *path,
1235 Error **errp)
1237 bool unlink_on_error = false;
1238 char *filename;
1239 char *sanitized_name;
1240 char *c;
1241 void *area;
1242 int fd = -1;
1243 int64_t page_size;
1245 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1246 error_setg(errp,
1247 "host lacks kvm mmu notifiers, -mem-path unsupported");
1248 return NULL;
1251 for (;;) {
1252 fd = open(path, O_RDWR);
1253 if (fd >= 0) {
1254 /* @path names an existing file, use it */
1255 break;
1257 if (errno == ENOENT) {
1258 /* @path names a file that doesn't exist, create it */
1259 fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1260 if (fd >= 0) {
1261 unlink_on_error = true;
1262 break;
1264 } else if (errno == EISDIR) {
1265 /* @path names a directory, create a file there */
1266 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1267 sanitized_name = g_strdup(memory_region_name(block->mr));
1268 for (c = sanitized_name; *c != '\0'; c++) {
1269 if (*c == '/') {
1270 *c = '_';
1274 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1275 sanitized_name);
1276 g_free(sanitized_name);
1278 fd = mkstemp(filename);
1279 if (fd >= 0) {
1280 unlink(filename);
1281 g_free(filename);
1282 break;
1284 g_free(filename);
1286 if (errno != EEXIST && errno != EINTR) {
1287 error_setg_errno(errp, errno,
1288 "can't open backing store %s for guest RAM",
1289 path);
1290 goto error;
1293 * Try again on EINTR and EEXIST. The latter happens when
1294 * something else creates the file between our two open().
1298 page_size = qemu_fd_getpagesize(fd);
1299 block->mr->align = page_size;
1301 if (memory < page_size) {
1302 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1303 "or larger than page size 0x%" PRIx64,
1304 memory, page_size);
1305 goto error;
1308 memory = ROUND_UP(memory, page_size);
1311 * ftruncate is not supported by hugetlbfs in older
1312 * hosts, so don't bother bailing out on errors.
1313 * If anything goes wrong with it under other filesystems,
1314 * mmap will fail.
1316 if (ftruncate(fd, memory)) {
1317 perror("ftruncate");
1320 area = qemu_ram_mmap(fd, memory, page_size, block->flags & RAM_SHARED);
1321 if (area == MAP_FAILED) {
1322 error_setg_errno(errp, errno,
1323 "unable to map backing store for guest RAM");
1324 goto error;
1327 if (mem_prealloc) {
1328 os_mem_prealloc(fd, area, memory);
1331 block->fd = fd;
1332 return area;
1334 error:
1335 if (unlink_on_error) {
1336 unlink(path);
1338 if (fd != -1) {
1339 close(fd);
1341 return NULL;
1343 #endif
1345 /* Called with the ramlist lock held. */
1346 static ram_addr_t find_ram_offset(ram_addr_t size)
1348 RAMBlock *block, *next_block;
1349 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1351 assert(size != 0); /* it would hand out same offset multiple times */
1353 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1354 return 0;
1357 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1358 ram_addr_t end, next = RAM_ADDR_MAX;
1360 end = block->offset + block->max_length;
1362 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1363 if (next_block->offset >= end) {
1364 next = MIN(next, next_block->offset);
1367 if (next - end >= size && next - end < mingap) {
1368 offset = end;
1369 mingap = next - end;
1373 if (offset == RAM_ADDR_MAX) {
1374 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1375 (uint64_t)size);
1376 abort();
1379 return offset;
1382 ram_addr_t last_ram_offset(void)
1384 RAMBlock *block;
1385 ram_addr_t last = 0;
1387 rcu_read_lock();
1388 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1389 last = MAX(last, block->offset + block->max_length);
1391 rcu_read_unlock();
1392 return last;
1395 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1397 int ret;
1399 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1400 if (!machine_dump_guest_core(current_machine)) {
1401 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1402 if (ret) {
1403 perror("qemu_madvise");
1404 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1405 "but dump_guest_core=off specified\n");
1410 /* Called within an RCU critical section, or while the ramlist lock
1411 * is held.
1413 static RAMBlock *find_ram_block(ram_addr_t addr)
1415 RAMBlock *block;
1417 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1418 if (block->offset == addr) {
1419 return block;
1423 return NULL;
1426 const char *qemu_ram_get_idstr(RAMBlock *rb)
1428 return rb->idstr;
1431 /* Called with iothread lock held. */
1432 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1434 RAMBlock *new_block, *block;
1436 rcu_read_lock();
1437 new_block = find_ram_block(addr);
1438 assert(new_block);
1439 assert(!new_block->idstr[0]);
1441 if (dev) {
1442 char *id = qdev_get_dev_path(dev);
1443 if (id) {
1444 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1445 g_free(id);
1448 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1450 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1451 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1452 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1453 new_block->idstr);
1454 abort();
1457 rcu_read_unlock();
1460 /* Called with iothread lock held. */
1461 void qemu_ram_unset_idstr(ram_addr_t addr)
1463 RAMBlock *block;
1465 /* FIXME: arch_init.c assumes that this is not called throughout
1466 * migration. Ignore the problem since hot-unplug during migration
1467 * does not work anyway.
1470 rcu_read_lock();
1471 block = find_ram_block(addr);
1472 if (block) {
1473 memset(block->idstr, 0, sizeof(block->idstr));
1475 rcu_read_unlock();
1478 static int memory_try_enable_merging(void *addr, size_t len)
1480 if (!machine_mem_merge(current_machine)) {
1481 /* disabled by the user */
1482 return 0;
1485 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1488 /* Only legal before guest might have detected the memory size: e.g. on
1489 * incoming migration, or right after reset.
1491 * As memory core doesn't know how is memory accessed, it is up to
1492 * resize callback to update device state and/or add assertions to detect
1493 * misuse, if necessary.
1495 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1497 RAMBlock *block = find_ram_block(base);
1499 assert(block);
1501 newsize = HOST_PAGE_ALIGN(newsize);
1503 if (block->used_length == newsize) {
1504 return 0;
1507 if (!(block->flags & RAM_RESIZEABLE)) {
1508 error_setg_errno(errp, EINVAL,
1509 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1510 " in != 0x" RAM_ADDR_FMT, block->idstr,
1511 newsize, block->used_length);
1512 return -EINVAL;
1515 if (block->max_length < newsize) {
1516 error_setg_errno(errp, EINVAL,
1517 "Length too large: %s: 0x" RAM_ADDR_FMT
1518 " > 0x" RAM_ADDR_FMT, block->idstr,
1519 newsize, block->max_length);
1520 return -EINVAL;
1523 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1524 block->used_length = newsize;
1525 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1526 DIRTY_CLIENTS_ALL);
1527 memory_region_set_size(block->mr, newsize);
1528 if (block->resized) {
1529 block->resized(block->idstr, newsize, block->host);
1531 return 0;
1534 /* Called with ram_list.mutex held */
1535 static void dirty_memory_extend(ram_addr_t old_ram_size,
1536 ram_addr_t new_ram_size)
1538 ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1539 DIRTY_MEMORY_BLOCK_SIZE);
1540 ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1541 DIRTY_MEMORY_BLOCK_SIZE);
1542 int i;
1544 /* Only need to extend if block count increased */
1545 if (new_num_blocks <= old_num_blocks) {
1546 return;
1549 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1550 DirtyMemoryBlocks *old_blocks;
1551 DirtyMemoryBlocks *new_blocks;
1552 int j;
1554 old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1555 new_blocks = g_malloc(sizeof(*new_blocks) +
1556 sizeof(new_blocks->blocks[0]) * new_num_blocks);
1558 if (old_num_blocks) {
1559 memcpy(new_blocks->blocks, old_blocks->blocks,
1560 old_num_blocks * sizeof(old_blocks->blocks[0]));
1563 for (j = old_num_blocks; j < new_num_blocks; j++) {
1564 new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1567 atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1569 if (old_blocks) {
1570 g_free_rcu(old_blocks, rcu);
1575 static void ram_block_add(RAMBlock *new_block, Error **errp)
1577 RAMBlock *block;
1578 RAMBlock *last_block = NULL;
1579 ram_addr_t old_ram_size, new_ram_size;
1580 Error *err = NULL;
1582 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1584 qemu_mutex_lock_ramlist();
1585 new_block->offset = find_ram_offset(new_block->max_length);
1587 if (!new_block->host) {
1588 if (xen_enabled()) {
1589 xen_ram_alloc(new_block->offset, new_block->max_length,
1590 new_block->mr, &err);
1591 if (err) {
1592 error_propagate(errp, err);
1593 qemu_mutex_unlock_ramlist();
1594 return;
1596 } else {
1597 new_block->host = phys_mem_alloc(new_block->max_length,
1598 &new_block->mr->align);
1599 if (!new_block->host) {
1600 error_setg_errno(errp, errno,
1601 "cannot set up guest memory '%s'",
1602 memory_region_name(new_block->mr));
1603 qemu_mutex_unlock_ramlist();
1604 return;
1606 memory_try_enable_merging(new_block->host, new_block->max_length);
1610 new_ram_size = MAX(old_ram_size,
1611 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1612 if (new_ram_size > old_ram_size) {
1613 migration_bitmap_extend(old_ram_size, new_ram_size);
1614 dirty_memory_extend(old_ram_size, new_ram_size);
1616 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1617 * QLIST (which has an RCU-friendly variant) does not have insertion at
1618 * tail, so save the last element in last_block.
1620 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1621 last_block = block;
1622 if (block->max_length < new_block->max_length) {
1623 break;
1626 if (block) {
1627 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1628 } else if (last_block) {
1629 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1630 } else { /* list is empty */
1631 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1633 ram_list.mru_block = NULL;
1635 /* Write list before version */
1636 smp_wmb();
1637 ram_list.version++;
1638 qemu_mutex_unlock_ramlist();
1640 cpu_physical_memory_set_dirty_range(new_block->offset,
1641 new_block->used_length,
1642 DIRTY_CLIENTS_ALL);
1644 if (new_block->host) {
1645 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1646 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1647 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1648 if (kvm_enabled()) {
1649 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1654 #ifdef __linux__
1655 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1656 bool share, const char *mem_path,
1657 Error **errp)
1659 RAMBlock *new_block;
1660 Error *local_err = NULL;
1662 if (xen_enabled()) {
1663 error_setg(errp, "-mem-path not supported with Xen");
1664 return NULL;
1667 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1669 * file_ram_alloc() needs to allocate just like
1670 * phys_mem_alloc, but we haven't bothered to provide
1671 * a hook there.
1673 error_setg(errp,
1674 "-mem-path not supported with this accelerator");
1675 return NULL;
1678 size = HOST_PAGE_ALIGN(size);
1679 new_block = g_malloc0(sizeof(*new_block));
1680 new_block->mr = mr;
1681 new_block->used_length = size;
1682 new_block->max_length = size;
1683 new_block->flags = share ? RAM_SHARED : 0;
1684 new_block->host = file_ram_alloc(new_block, size,
1685 mem_path, errp);
1686 if (!new_block->host) {
1687 g_free(new_block);
1688 return NULL;
1691 ram_block_add(new_block, &local_err);
1692 if (local_err) {
1693 g_free(new_block);
1694 error_propagate(errp, local_err);
1695 return NULL;
1697 return new_block;
1699 #endif
1701 static
1702 RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1703 void (*resized)(const char*,
1704 uint64_t length,
1705 void *host),
1706 void *host, bool resizeable,
1707 MemoryRegion *mr, Error **errp)
1709 RAMBlock *new_block;
1710 Error *local_err = NULL;
1712 size = HOST_PAGE_ALIGN(size);
1713 max_size = HOST_PAGE_ALIGN(max_size);
1714 new_block = g_malloc0(sizeof(*new_block));
1715 new_block->mr = mr;
1716 new_block->resized = resized;
1717 new_block->used_length = size;
1718 new_block->max_length = max_size;
1719 assert(max_size >= size);
1720 new_block->fd = -1;
1721 new_block->host = host;
1722 if (host) {
1723 new_block->flags |= RAM_PREALLOC;
1725 if (resizeable) {
1726 new_block->flags |= RAM_RESIZEABLE;
1728 ram_block_add(new_block, &local_err);
1729 if (local_err) {
1730 g_free(new_block);
1731 error_propagate(errp, local_err);
1732 return NULL;
1734 return new_block;
1737 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1738 MemoryRegion *mr, Error **errp)
1740 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1743 RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1745 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1748 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1749 void (*resized)(const char*,
1750 uint64_t length,
1751 void *host),
1752 MemoryRegion *mr, Error **errp)
1754 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1757 static void reclaim_ramblock(RAMBlock *block)
1759 if (block->flags & RAM_PREALLOC) {
1761 } else if (xen_enabled()) {
1762 xen_invalidate_map_cache_entry(block->host);
1763 #ifndef _WIN32
1764 } else if (block->fd >= 0) {
1765 qemu_ram_munmap(block->host, block->max_length);
1766 close(block->fd);
1767 #endif
1768 } else {
1769 qemu_anon_ram_free(block->host, block->max_length);
1771 g_free(block);
1774 void qemu_ram_free(RAMBlock *block)
1776 if (!block) {
1777 return;
1780 qemu_mutex_lock_ramlist();
1781 QLIST_REMOVE_RCU(block, next);
1782 ram_list.mru_block = NULL;
1783 /* Write list before version */
1784 smp_wmb();
1785 ram_list.version++;
1786 call_rcu(block, reclaim_ramblock, rcu);
1787 qemu_mutex_unlock_ramlist();
1790 #ifndef _WIN32
1791 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1793 RAMBlock *block;
1794 ram_addr_t offset;
1795 int flags;
1796 void *area, *vaddr;
1798 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1799 offset = addr - block->offset;
1800 if (offset < block->max_length) {
1801 vaddr = ramblock_ptr(block, offset);
1802 if (block->flags & RAM_PREALLOC) {
1804 } else if (xen_enabled()) {
1805 abort();
1806 } else {
1807 flags = MAP_FIXED;
1808 if (block->fd >= 0) {
1809 flags |= (block->flags & RAM_SHARED ?
1810 MAP_SHARED : MAP_PRIVATE);
1811 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1812 flags, block->fd, offset);
1813 } else {
1815 * Remap needs to match alloc. Accelerators that
1816 * set phys_mem_alloc never remap. If they did,
1817 * we'd need a remap hook here.
1819 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1821 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1822 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1823 flags, -1, 0);
1825 if (area != vaddr) {
1826 fprintf(stderr, "Could not remap addr: "
1827 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1828 length, addr);
1829 exit(1);
1831 memory_try_enable_merging(vaddr, length);
1832 qemu_ram_setup_dump(vaddr, length);
1837 #endif /* !_WIN32 */
1839 int qemu_get_ram_fd(ram_addr_t addr)
1841 RAMBlock *block;
1842 int fd;
1844 rcu_read_lock();
1845 block = qemu_get_ram_block(addr);
1846 fd = block->fd;
1847 rcu_read_unlock();
1848 return fd;
1851 void qemu_set_ram_fd(ram_addr_t addr, int fd)
1853 RAMBlock *block;
1855 rcu_read_lock();
1856 block = qemu_get_ram_block(addr);
1857 block->fd = fd;
1858 rcu_read_unlock();
1861 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1863 RAMBlock *block;
1864 void *ptr;
1866 rcu_read_lock();
1867 block = qemu_get_ram_block(addr);
1868 ptr = ramblock_ptr(block, 0);
1869 rcu_read_unlock();
1870 return ptr;
1873 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1874 * This should not be used for general purpose DMA. Use address_space_map
1875 * or address_space_rw instead. For local memory (e.g. video ram) that the
1876 * device owns, use memory_region_get_ram_ptr.
1878 * Called within RCU critical section.
1880 void *qemu_get_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1882 RAMBlock *block = ram_block;
1884 if (block == NULL) {
1885 block = qemu_get_ram_block(addr);
1888 if (xen_enabled() && block->host == NULL) {
1889 /* We need to check if the requested address is in the RAM
1890 * because we don't want to map the entire memory in QEMU.
1891 * In that case just map until the end of the page.
1893 if (block->offset == 0) {
1894 return xen_map_cache(addr, 0, 0);
1897 block->host = xen_map_cache(block->offset, block->max_length, 1);
1899 return ramblock_ptr(block, addr - block->offset);
1902 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1903 * but takes a size argument.
1905 * Called within RCU critical section.
1907 static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1908 hwaddr *size)
1910 RAMBlock *block = ram_block;
1911 ram_addr_t offset_inside_block;
1912 if (*size == 0) {
1913 return NULL;
1916 if (block == NULL) {
1917 block = qemu_get_ram_block(addr);
1919 offset_inside_block = addr - block->offset;
1920 *size = MIN(*size, block->max_length - offset_inside_block);
1922 if (xen_enabled() && block->host == NULL) {
1923 /* We need to check if the requested address is in the RAM
1924 * because we don't want to map the entire memory in QEMU.
1925 * In that case just map the requested area.
1927 if (block->offset == 0) {
1928 return xen_map_cache(addr, *size, 1);
1931 block->host = xen_map_cache(block->offset, block->max_length, 1);
1934 return ramblock_ptr(block, offset_inside_block);
1938 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1939 * in that RAMBlock.
1941 * ptr: Host pointer to look up
1942 * round_offset: If true round the result offset down to a page boundary
1943 * *ram_addr: set to result ram_addr
1944 * *offset: set to result offset within the RAMBlock
1946 * Returns: RAMBlock (or NULL if not found)
1948 * By the time this function returns, the returned pointer is not protected
1949 * by RCU anymore. If the caller is not within an RCU critical section and
1950 * does not hold the iothread lock, it must have other means of protecting the
1951 * pointer, such as a reference to the region that includes the incoming
1952 * ram_addr_t.
1954 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1955 ram_addr_t *ram_addr,
1956 ram_addr_t *offset)
1958 RAMBlock *block;
1959 uint8_t *host = ptr;
1961 if (xen_enabled()) {
1962 rcu_read_lock();
1963 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1964 block = qemu_get_ram_block(*ram_addr);
1965 if (block) {
1966 *offset = (host - block->host);
1968 rcu_read_unlock();
1969 return block;
1972 rcu_read_lock();
1973 block = atomic_rcu_read(&ram_list.mru_block);
1974 if (block && block->host && host - block->host < block->max_length) {
1975 goto found;
1978 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1979 /* This case append when the block is not mapped. */
1980 if (block->host == NULL) {
1981 continue;
1983 if (host - block->host < block->max_length) {
1984 goto found;
1988 rcu_read_unlock();
1989 return NULL;
1991 found:
1992 *offset = (host - block->host);
1993 if (round_offset) {
1994 *offset &= TARGET_PAGE_MASK;
1996 *ram_addr = block->offset + *offset;
1997 rcu_read_unlock();
1998 return block;
2002 * Finds the named RAMBlock
2004 * name: The name of RAMBlock to find
2006 * Returns: RAMBlock (or NULL if not found)
2008 RAMBlock *qemu_ram_block_by_name(const char *name)
2010 RAMBlock *block;
2012 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2013 if (!strcmp(name, block->idstr)) {
2014 return block;
2018 return NULL;
2021 /* Some of the softmmu routines need to translate from a host pointer
2022 (typically a TLB entry) back to a ram offset. */
2023 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2025 RAMBlock *block;
2026 ram_addr_t offset; /* Not used */
2028 block = qemu_ram_block_from_host(ptr, false, ram_addr, &offset);
2030 if (!block) {
2031 return NULL;
2034 return block->mr;
2037 /* Called within RCU critical section. */
2038 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2039 uint64_t val, unsigned size)
2041 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
2042 tb_invalidate_phys_page_fast(ram_addr, size);
2044 switch (size) {
2045 case 1:
2046 stb_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2047 break;
2048 case 2:
2049 stw_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2050 break;
2051 case 4:
2052 stl_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2053 break;
2054 default:
2055 abort();
2057 /* Set both VGA and migration bits for simplicity and to remove
2058 * the notdirty callback faster.
2060 cpu_physical_memory_set_dirty_range(ram_addr, size,
2061 DIRTY_CLIENTS_NOCODE);
2062 /* we remove the notdirty callback only if the code has been
2063 flushed */
2064 if (!cpu_physical_memory_is_clean(ram_addr)) {
2065 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2069 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2070 unsigned size, bool is_write)
2072 return is_write;
2075 static const MemoryRegionOps notdirty_mem_ops = {
2076 .write = notdirty_mem_write,
2077 .valid.accepts = notdirty_mem_accepts,
2078 .endianness = DEVICE_NATIVE_ENDIAN,
2081 /* Generate a debug exception if a watchpoint has been hit. */
2082 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2084 CPUState *cpu = current_cpu;
2085 CPUClass *cc = CPU_GET_CLASS(cpu);
2086 CPUArchState *env = cpu->env_ptr;
2087 target_ulong pc, cs_base;
2088 target_ulong vaddr;
2089 CPUWatchpoint *wp;
2090 uint32_t cpu_flags;
2092 if (cpu->watchpoint_hit) {
2093 /* We re-entered the check after replacing the TB. Now raise
2094 * the debug interrupt so that is will trigger after the
2095 * current instruction. */
2096 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2097 return;
2099 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2100 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2101 if (cpu_watchpoint_address_matches(wp, vaddr, len)
2102 && (wp->flags & flags)) {
2103 if (flags == BP_MEM_READ) {
2104 wp->flags |= BP_WATCHPOINT_HIT_READ;
2105 } else {
2106 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2108 wp->hitaddr = vaddr;
2109 wp->hitattrs = attrs;
2110 if (!cpu->watchpoint_hit) {
2111 if (wp->flags & BP_CPU &&
2112 !cc->debug_check_watchpoint(cpu, wp)) {
2113 wp->flags &= ~BP_WATCHPOINT_HIT;
2114 continue;
2116 cpu->watchpoint_hit = wp;
2117 tb_check_watchpoint(cpu);
2118 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2119 cpu->exception_index = EXCP_DEBUG;
2120 cpu_loop_exit(cpu);
2121 } else {
2122 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2123 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2124 cpu_resume_from_signal(cpu, NULL);
2127 } else {
2128 wp->flags &= ~BP_WATCHPOINT_HIT;
2133 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2134 so these check for a hit then pass through to the normal out-of-line
2135 phys routines. */
2136 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2137 unsigned size, MemTxAttrs attrs)
2139 MemTxResult res;
2140 uint64_t data;
2141 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2142 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2144 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2145 switch (size) {
2146 case 1:
2147 data = address_space_ldub(as, addr, attrs, &res);
2148 break;
2149 case 2:
2150 data = address_space_lduw(as, addr, attrs, &res);
2151 break;
2152 case 4:
2153 data = address_space_ldl(as, addr, attrs, &res);
2154 break;
2155 default: abort();
2157 *pdata = data;
2158 return res;
2161 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2162 uint64_t val, unsigned size,
2163 MemTxAttrs attrs)
2165 MemTxResult res;
2166 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2167 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2169 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2170 switch (size) {
2171 case 1:
2172 address_space_stb(as, addr, val, attrs, &res);
2173 break;
2174 case 2:
2175 address_space_stw(as, addr, val, attrs, &res);
2176 break;
2177 case 4:
2178 address_space_stl(as, addr, val, attrs, &res);
2179 break;
2180 default: abort();
2182 return res;
2185 static const MemoryRegionOps watch_mem_ops = {
2186 .read_with_attrs = watch_mem_read,
2187 .write_with_attrs = watch_mem_write,
2188 .endianness = DEVICE_NATIVE_ENDIAN,
2191 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2192 unsigned len, MemTxAttrs attrs)
2194 subpage_t *subpage = opaque;
2195 uint8_t buf[8];
2196 MemTxResult res;
2198 #if defined(DEBUG_SUBPAGE)
2199 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2200 subpage, len, addr);
2201 #endif
2202 res = address_space_read(subpage->as, addr + subpage->base,
2203 attrs, buf, len);
2204 if (res) {
2205 return res;
2207 switch (len) {
2208 case 1:
2209 *data = ldub_p(buf);
2210 return MEMTX_OK;
2211 case 2:
2212 *data = lduw_p(buf);
2213 return MEMTX_OK;
2214 case 4:
2215 *data = ldl_p(buf);
2216 return MEMTX_OK;
2217 case 8:
2218 *data = ldq_p(buf);
2219 return MEMTX_OK;
2220 default:
2221 abort();
2225 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2226 uint64_t value, unsigned len, MemTxAttrs attrs)
2228 subpage_t *subpage = opaque;
2229 uint8_t buf[8];
2231 #if defined(DEBUG_SUBPAGE)
2232 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2233 " value %"PRIx64"\n",
2234 __func__, subpage, len, addr, value);
2235 #endif
2236 switch (len) {
2237 case 1:
2238 stb_p(buf, value);
2239 break;
2240 case 2:
2241 stw_p(buf, value);
2242 break;
2243 case 4:
2244 stl_p(buf, value);
2245 break;
2246 case 8:
2247 stq_p(buf, value);
2248 break;
2249 default:
2250 abort();
2252 return address_space_write(subpage->as, addr + subpage->base,
2253 attrs, buf, len);
2256 static bool subpage_accepts(void *opaque, hwaddr addr,
2257 unsigned len, bool is_write)
2259 subpage_t *subpage = opaque;
2260 #if defined(DEBUG_SUBPAGE)
2261 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2262 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2263 #endif
2265 return address_space_access_valid(subpage->as, addr + subpage->base,
2266 len, is_write);
2269 static const MemoryRegionOps subpage_ops = {
2270 .read_with_attrs = subpage_read,
2271 .write_with_attrs = subpage_write,
2272 .impl.min_access_size = 1,
2273 .impl.max_access_size = 8,
2274 .valid.min_access_size = 1,
2275 .valid.max_access_size = 8,
2276 .valid.accepts = subpage_accepts,
2277 .endianness = DEVICE_NATIVE_ENDIAN,
2280 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2281 uint16_t section)
2283 int idx, eidx;
2285 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2286 return -1;
2287 idx = SUBPAGE_IDX(start);
2288 eidx = SUBPAGE_IDX(end);
2289 #if defined(DEBUG_SUBPAGE)
2290 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2291 __func__, mmio, start, end, idx, eidx, section);
2292 #endif
2293 for (; idx <= eidx; idx++) {
2294 mmio->sub_section[idx] = section;
2297 return 0;
2300 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2302 subpage_t *mmio;
2304 mmio = g_malloc0(sizeof(subpage_t));
2306 mmio->as = as;
2307 mmio->base = base;
2308 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2309 NULL, TARGET_PAGE_SIZE);
2310 mmio->iomem.subpage = true;
2311 #if defined(DEBUG_SUBPAGE)
2312 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2313 mmio, base, TARGET_PAGE_SIZE);
2314 #endif
2315 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2317 return mmio;
2320 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2321 MemoryRegion *mr)
2323 assert(as);
2324 MemoryRegionSection section = {
2325 .address_space = as,
2326 .mr = mr,
2327 .offset_within_address_space = 0,
2328 .offset_within_region = 0,
2329 .size = int128_2_64(),
2332 return phys_section_add(map, &section);
2335 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2337 int asidx = cpu_asidx_from_attrs(cpu, attrs);
2338 CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2339 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2340 MemoryRegionSection *sections = d->map.sections;
2342 return sections[index & ~TARGET_PAGE_MASK].mr;
2345 static void io_mem_init(void)
2347 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2348 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2349 NULL, UINT64_MAX);
2350 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2351 NULL, UINT64_MAX);
2352 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2353 NULL, UINT64_MAX);
2356 static void mem_begin(MemoryListener *listener)
2358 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2359 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2360 uint16_t n;
2362 n = dummy_section(&d->map, as, &io_mem_unassigned);
2363 assert(n == PHYS_SECTION_UNASSIGNED);
2364 n = dummy_section(&d->map, as, &io_mem_notdirty);
2365 assert(n == PHYS_SECTION_NOTDIRTY);
2366 n = dummy_section(&d->map, as, &io_mem_rom);
2367 assert(n == PHYS_SECTION_ROM);
2368 n = dummy_section(&d->map, as, &io_mem_watch);
2369 assert(n == PHYS_SECTION_WATCH);
2371 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2372 d->as = as;
2373 as->next_dispatch = d;
2376 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2378 phys_sections_free(&d->map);
2379 g_free(d);
2382 static void mem_commit(MemoryListener *listener)
2384 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2385 AddressSpaceDispatch *cur = as->dispatch;
2386 AddressSpaceDispatch *next = as->next_dispatch;
2388 phys_page_compact_all(next, next->map.nodes_nb);
2390 atomic_rcu_set(&as->dispatch, next);
2391 if (cur) {
2392 call_rcu(cur, address_space_dispatch_free, rcu);
2396 static void tcg_commit(MemoryListener *listener)
2398 CPUAddressSpace *cpuas;
2399 AddressSpaceDispatch *d;
2401 /* since each CPU stores ram addresses in its TLB cache, we must
2402 reset the modified entries */
2403 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2404 cpu_reloading_memory_map();
2405 /* The CPU and TLB are protected by the iothread lock.
2406 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2407 * may have split the RCU critical section.
2409 d = atomic_rcu_read(&cpuas->as->dispatch);
2410 cpuas->memory_dispatch = d;
2411 tlb_flush(cpuas->cpu, 1);
2414 void address_space_init_dispatch(AddressSpace *as)
2416 as->dispatch = NULL;
2417 as->dispatch_listener = (MemoryListener) {
2418 .begin = mem_begin,
2419 .commit = mem_commit,
2420 .region_add = mem_add,
2421 .region_nop = mem_add,
2422 .priority = 0,
2424 memory_listener_register(&as->dispatch_listener, as);
2427 void address_space_unregister(AddressSpace *as)
2429 memory_listener_unregister(&as->dispatch_listener);
2432 void address_space_destroy_dispatch(AddressSpace *as)
2434 AddressSpaceDispatch *d = as->dispatch;
2436 atomic_rcu_set(&as->dispatch, NULL);
2437 if (d) {
2438 call_rcu(d, address_space_dispatch_free, rcu);
2442 static void memory_map_init(void)
2444 system_memory = g_malloc(sizeof(*system_memory));
2446 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2447 address_space_init(&address_space_memory, system_memory, "memory");
2449 system_io = g_malloc(sizeof(*system_io));
2450 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2451 65536);
2452 address_space_init(&address_space_io, system_io, "I/O");
2455 MemoryRegion *get_system_memory(void)
2457 return system_memory;
2460 MemoryRegion *get_system_io(void)
2462 return system_io;
2465 #endif /* !defined(CONFIG_USER_ONLY) */
2467 /* physical memory access (slow version, mainly for debug) */
2468 #if defined(CONFIG_USER_ONLY)
2469 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2470 uint8_t *buf, int len, int is_write)
2472 int l, flags;
2473 target_ulong page;
2474 void * p;
2476 while (len > 0) {
2477 page = addr & TARGET_PAGE_MASK;
2478 l = (page + TARGET_PAGE_SIZE) - addr;
2479 if (l > len)
2480 l = len;
2481 flags = page_get_flags(page);
2482 if (!(flags & PAGE_VALID))
2483 return -1;
2484 if (is_write) {
2485 if (!(flags & PAGE_WRITE))
2486 return -1;
2487 /* XXX: this code should not depend on lock_user */
2488 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2489 return -1;
2490 memcpy(p, buf, l);
2491 unlock_user(p, addr, l);
2492 } else {
2493 if (!(flags & PAGE_READ))
2494 return -1;
2495 /* XXX: this code should not depend on lock_user */
2496 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2497 return -1;
2498 memcpy(buf, p, l);
2499 unlock_user(p, addr, 0);
2501 len -= l;
2502 buf += l;
2503 addr += l;
2505 return 0;
2508 #else
2510 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2511 hwaddr length)
2513 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2514 /* No early return if dirty_log_mask is or becomes 0, because
2515 * cpu_physical_memory_set_dirty_range will still call
2516 * xen_modified_memory.
2518 if (dirty_log_mask) {
2519 dirty_log_mask =
2520 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2522 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2523 tb_invalidate_phys_range(addr, addr + length);
2524 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2526 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2529 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2531 unsigned access_size_max = mr->ops->valid.max_access_size;
2533 /* Regions are assumed to support 1-4 byte accesses unless
2534 otherwise specified. */
2535 if (access_size_max == 0) {
2536 access_size_max = 4;
2539 /* Bound the maximum access by the alignment of the address. */
2540 if (!mr->ops->impl.unaligned) {
2541 unsigned align_size_max = addr & -addr;
2542 if (align_size_max != 0 && align_size_max < access_size_max) {
2543 access_size_max = align_size_max;
2547 /* Don't attempt accesses larger than the maximum. */
2548 if (l > access_size_max) {
2549 l = access_size_max;
2551 l = pow2floor(l);
2553 return l;
2556 static bool prepare_mmio_access(MemoryRegion *mr)
2558 bool unlocked = !qemu_mutex_iothread_locked();
2559 bool release_lock = false;
2561 if (unlocked && mr->global_locking) {
2562 qemu_mutex_lock_iothread();
2563 unlocked = false;
2564 release_lock = true;
2566 if (mr->flush_coalesced_mmio) {
2567 if (unlocked) {
2568 qemu_mutex_lock_iothread();
2570 qemu_flush_coalesced_mmio_buffer();
2571 if (unlocked) {
2572 qemu_mutex_unlock_iothread();
2576 return release_lock;
2579 /* Called within RCU critical section. */
2580 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2581 MemTxAttrs attrs,
2582 const uint8_t *buf,
2583 int len, hwaddr addr1,
2584 hwaddr l, MemoryRegion *mr)
2586 uint8_t *ptr;
2587 uint64_t val;
2588 MemTxResult result = MEMTX_OK;
2589 bool release_lock = false;
2591 for (;;) {
2592 if (!memory_access_is_direct(mr, true)) {
2593 release_lock |= prepare_mmio_access(mr);
2594 l = memory_access_size(mr, l, addr1);
2595 /* XXX: could force current_cpu to NULL to avoid
2596 potential bugs */
2597 switch (l) {
2598 case 8:
2599 /* 64 bit write access */
2600 val = ldq_p(buf);
2601 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2602 attrs);
2603 break;
2604 case 4:
2605 /* 32 bit write access */
2606 val = ldl_p(buf);
2607 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2608 attrs);
2609 break;
2610 case 2:
2611 /* 16 bit write access */
2612 val = lduw_p(buf);
2613 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2614 attrs);
2615 break;
2616 case 1:
2617 /* 8 bit write access */
2618 val = ldub_p(buf);
2619 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2620 attrs);
2621 break;
2622 default:
2623 abort();
2625 } else {
2626 addr1 += memory_region_get_ram_addr(mr);
2627 /* RAM case */
2628 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
2629 memcpy(ptr, buf, l);
2630 invalidate_and_set_dirty(mr, addr1, l);
2633 if (release_lock) {
2634 qemu_mutex_unlock_iothread();
2635 release_lock = false;
2638 len -= l;
2639 buf += l;
2640 addr += l;
2642 if (!len) {
2643 break;
2646 l = len;
2647 mr = address_space_translate(as, addr, &addr1, &l, true);
2650 return result;
2653 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2654 const uint8_t *buf, int len)
2656 hwaddr l;
2657 hwaddr addr1;
2658 MemoryRegion *mr;
2659 MemTxResult result = MEMTX_OK;
2661 if (len > 0) {
2662 rcu_read_lock();
2663 l = len;
2664 mr = address_space_translate(as, addr, &addr1, &l, true);
2665 result = address_space_write_continue(as, addr, attrs, buf, len,
2666 addr1, l, mr);
2667 rcu_read_unlock();
2670 return result;
2673 /* Called within RCU critical section. */
2674 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2675 MemTxAttrs attrs, uint8_t *buf,
2676 int len, hwaddr addr1, hwaddr l,
2677 MemoryRegion *mr)
2679 uint8_t *ptr;
2680 uint64_t val;
2681 MemTxResult result = MEMTX_OK;
2682 bool release_lock = false;
2684 for (;;) {
2685 if (!memory_access_is_direct(mr, false)) {
2686 /* I/O case */
2687 release_lock |= prepare_mmio_access(mr);
2688 l = memory_access_size(mr, l, addr1);
2689 switch (l) {
2690 case 8:
2691 /* 64 bit read access */
2692 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2693 attrs);
2694 stq_p(buf, val);
2695 break;
2696 case 4:
2697 /* 32 bit read access */
2698 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2699 attrs);
2700 stl_p(buf, val);
2701 break;
2702 case 2:
2703 /* 16 bit read access */
2704 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2705 attrs);
2706 stw_p(buf, val);
2707 break;
2708 case 1:
2709 /* 8 bit read access */
2710 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2711 attrs);
2712 stb_p(buf, val);
2713 break;
2714 default:
2715 abort();
2717 } else {
2718 /* RAM case */
2719 ptr = qemu_get_ram_ptr(mr->ram_block,
2720 memory_region_get_ram_addr(mr) + addr1);
2721 memcpy(buf, ptr, l);
2724 if (release_lock) {
2725 qemu_mutex_unlock_iothread();
2726 release_lock = false;
2729 len -= l;
2730 buf += l;
2731 addr += l;
2733 if (!len) {
2734 break;
2737 l = len;
2738 mr = address_space_translate(as, addr, &addr1, &l, false);
2741 return result;
2744 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2745 MemTxAttrs attrs, uint8_t *buf, int len)
2747 hwaddr l;
2748 hwaddr addr1;
2749 MemoryRegion *mr;
2750 MemTxResult result = MEMTX_OK;
2752 if (len > 0) {
2753 rcu_read_lock();
2754 l = len;
2755 mr = address_space_translate(as, addr, &addr1, &l, false);
2756 result = address_space_read_continue(as, addr, attrs, buf, len,
2757 addr1, l, mr);
2758 rcu_read_unlock();
2761 return result;
2764 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2765 uint8_t *buf, int len, bool is_write)
2767 if (is_write) {
2768 return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2769 } else {
2770 return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2774 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2775 int len, int is_write)
2777 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2778 buf, len, is_write);
2781 enum write_rom_type {
2782 WRITE_DATA,
2783 FLUSH_CACHE,
2786 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2787 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2789 hwaddr l;
2790 uint8_t *ptr;
2791 hwaddr addr1;
2792 MemoryRegion *mr;
2794 rcu_read_lock();
2795 while (len > 0) {
2796 l = len;
2797 mr = address_space_translate(as, addr, &addr1, &l, true);
2799 if (!(memory_region_is_ram(mr) ||
2800 memory_region_is_romd(mr))) {
2801 l = memory_access_size(mr, l, addr1);
2802 } else {
2803 addr1 += memory_region_get_ram_addr(mr);
2804 /* ROM/RAM case */
2805 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
2806 switch (type) {
2807 case WRITE_DATA:
2808 memcpy(ptr, buf, l);
2809 invalidate_and_set_dirty(mr, addr1, l);
2810 break;
2811 case FLUSH_CACHE:
2812 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2813 break;
2816 len -= l;
2817 buf += l;
2818 addr += l;
2820 rcu_read_unlock();
2823 /* used for ROM loading : can write in RAM and ROM */
2824 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2825 const uint8_t *buf, int len)
2827 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2830 void cpu_flush_icache_range(hwaddr start, int len)
2833 * This function should do the same thing as an icache flush that was
2834 * triggered from within the guest. For TCG we are always cache coherent,
2835 * so there is no need to flush anything. For KVM / Xen we need to flush
2836 * the host's instruction cache at least.
2838 if (tcg_enabled()) {
2839 return;
2842 cpu_physical_memory_write_rom_internal(&address_space_memory,
2843 start, NULL, len, FLUSH_CACHE);
2846 typedef struct {
2847 MemoryRegion *mr;
2848 void *buffer;
2849 hwaddr addr;
2850 hwaddr len;
2851 bool in_use;
2852 } BounceBuffer;
2854 static BounceBuffer bounce;
2856 typedef struct MapClient {
2857 QEMUBH *bh;
2858 QLIST_ENTRY(MapClient) link;
2859 } MapClient;
2861 QemuMutex map_client_list_lock;
2862 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2863 = QLIST_HEAD_INITIALIZER(map_client_list);
2865 static void cpu_unregister_map_client_do(MapClient *client)
2867 QLIST_REMOVE(client, link);
2868 g_free(client);
2871 static void cpu_notify_map_clients_locked(void)
2873 MapClient *client;
2875 while (!QLIST_EMPTY(&map_client_list)) {
2876 client = QLIST_FIRST(&map_client_list);
2877 qemu_bh_schedule(client->bh);
2878 cpu_unregister_map_client_do(client);
2882 void cpu_register_map_client(QEMUBH *bh)
2884 MapClient *client = g_malloc(sizeof(*client));
2886 qemu_mutex_lock(&map_client_list_lock);
2887 client->bh = bh;
2888 QLIST_INSERT_HEAD(&map_client_list, client, link);
2889 if (!atomic_read(&bounce.in_use)) {
2890 cpu_notify_map_clients_locked();
2892 qemu_mutex_unlock(&map_client_list_lock);
2895 void cpu_exec_init_all(void)
2897 qemu_mutex_init(&ram_list.mutex);
2898 io_mem_init();
2899 memory_map_init();
2900 qemu_mutex_init(&map_client_list_lock);
2903 void cpu_unregister_map_client(QEMUBH *bh)
2905 MapClient *client;
2907 qemu_mutex_lock(&map_client_list_lock);
2908 QLIST_FOREACH(client, &map_client_list, link) {
2909 if (client->bh == bh) {
2910 cpu_unregister_map_client_do(client);
2911 break;
2914 qemu_mutex_unlock(&map_client_list_lock);
2917 static void cpu_notify_map_clients(void)
2919 qemu_mutex_lock(&map_client_list_lock);
2920 cpu_notify_map_clients_locked();
2921 qemu_mutex_unlock(&map_client_list_lock);
2924 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2926 MemoryRegion *mr;
2927 hwaddr l, xlat;
2929 rcu_read_lock();
2930 while (len > 0) {
2931 l = len;
2932 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2933 if (!memory_access_is_direct(mr, is_write)) {
2934 l = memory_access_size(mr, l, addr);
2935 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2936 return false;
2940 len -= l;
2941 addr += l;
2943 rcu_read_unlock();
2944 return true;
2947 /* Map a physical memory region into a host virtual address.
2948 * May map a subset of the requested range, given by and returned in *plen.
2949 * May return NULL if resources needed to perform the mapping are exhausted.
2950 * Use only for reads OR writes - not for read-modify-write operations.
2951 * Use cpu_register_map_client() to know when retrying the map operation is
2952 * likely to succeed.
2954 void *address_space_map(AddressSpace *as,
2955 hwaddr addr,
2956 hwaddr *plen,
2957 bool is_write)
2959 hwaddr len = *plen;
2960 hwaddr done = 0;
2961 hwaddr l, xlat, base;
2962 MemoryRegion *mr, *this_mr;
2963 ram_addr_t raddr;
2964 void *ptr;
2966 if (len == 0) {
2967 return NULL;
2970 l = len;
2971 rcu_read_lock();
2972 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2974 if (!memory_access_is_direct(mr, is_write)) {
2975 if (atomic_xchg(&bounce.in_use, true)) {
2976 rcu_read_unlock();
2977 return NULL;
2979 /* Avoid unbounded allocations */
2980 l = MIN(l, TARGET_PAGE_SIZE);
2981 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2982 bounce.addr = addr;
2983 bounce.len = l;
2985 memory_region_ref(mr);
2986 bounce.mr = mr;
2987 if (!is_write) {
2988 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2989 bounce.buffer, l);
2992 rcu_read_unlock();
2993 *plen = l;
2994 return bounce.buffer;
2997 base = xlat;
2998 raddr = memory_region_get_ram_addr(mr);
3000 for (;;) {
3001 len -= l;
3002 addr += l;
3003 done += l;
3004 if (len == 0) {
3005 break;
3008 l = len;
3009 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
3010 if (this_mr != mr || xlat != base + done) {
3011 break;
3015 memory_region_ref(mr);
3016 *plen = done;
3017 ptr = qemu_ram_ptr_length(mr->ram_block, raddr + base, plen);
3018 rcu_read_unlock();
3020 return ptr;
3023 /* Unmaps a memory region previously mapped by address_space_map().
3024 * Will also mark the memory as dirty if is_write == 1. access_len gives
3025 * the amount of memory that was actually read or written by the caller.
3027 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3028 int is_write, hwaddr access_len)
3030 if (buffer != bounce.buffer) {
3031 MemoryRegion *mr;
3032 ram_addr_t addr1;
3034 mr = qemu_ram_addr_from_host(buffer, &addr1);
3035 assert(mr != NULL);
3036 if (is_write) {
3037 invalidate_and_set_dirty(mr, addr1, access_len);
3039 if (xen_enabled()) {
3040 xen_invalidate_map_cache_entry(buffer);
3042 memory_region_unref(mr);
3043 return;
3045 if (is_write) {
3046 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
3047 bounce.buffer, access_len);
3049 qemu_vfree(bounce.buffer);
3050 bounce.buffer = NULL;
3051 memory_region_unref(bounce.mr);
3052 atomic_mb_set(&bounce.in_use, false);
3053 cpu_notify_map_clients();
3056 void *cpu_physical_memory_map(hwaddr addr,
3057 hwaddr *plen,
3058 int is_write)
3060 return address_space_map(&address_space_memory, addr, plen, is_write);
3063 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3064 int is_write, hwaddr access_len)
3066 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3069 /* warning: addr must be aligned */
3070 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3071 MemTxAttrs attrs,
3072 MemTxResult *result,
3073 enum device_endian endian)
3075 uint8_t *ptr;
3076 uint64_t val;
3077 MemoryRegion *mr;
3078 hwaddr l = 4;
3079 hwaddr addr1;
3080 MemTxResult r;
3081 bool release_lock = false;
3083 rcu_read_lock();
3084 mr = address_space_translate(as, addr, &addr1, &l, false);
3085 if (l < 4 || !memory_access_is_direct(mr, false)) {
3086 release_lock |= prepare_mmio_access(mr);
3088 /* I/O case */
3089 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3090 #if defined(TARGET_WORDS_BIGENDIAN)
3091 if (endian == DEVICE_LITTLE_ENDIAN) {
3092 val = bswap32(val);
3094 #else
3095 if (endian == DEVICE_BIG_ENDIAN) {
3096 val = bswap32(val);
3098 #endif
3099 } else {
3100 /* RAM case */
3101 ptr = qemu_get_ram_ptr(mr->ram_block,
3102 (memory_region_get_ram_addr(mr)
3103 & TARGET_PAGE_MASK)
3104 + addr1);
3105 switch (endian) {
3106 case DEVICE_LITTLE_ENDIAN:
3107 val = ldl_le_p(ptr);
3108 break;
3109 case DEVICE_BIG_ENDIAN:
3110 val = ldl_be_p(ptr);
3111 break;
3112 default:
3113 val = ldl_p(ptr);
3114 break;
3116 r = MEMTX_OK;
3118 if (result) {
3119 *result = r;
3121 if (release_lock) {
3122 qemu_mutex_unlock_iothread();
3124 rcu_read_unlock();
3125 return val;
3128 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3129 MemTxAttrs attrs, MemTxResult *result)
3131 return address_space_ldl_internal(as, addr, attrs, result,
3132 DEVICE_NATIVE_ENDIAN);
3135 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3136 MemTxAttrs attrs, MemTxResult *result)
3138 return address_space_ldl_internal(as, addr, attrs, result,
3139 DEVICE_LITTLE_ENDIAN);
3142 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3143 MemTxAttrs attrs, MemTxResult *result)
3145 return address_space_ldl_internal(as, addr, attrs, result,
3146 DEVICE_BIG_ENDIAN);
3149 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3151 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3154 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3156 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3159 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3161 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3164 /* warning: addr must be aligned */
3165 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3166 MemTxAttrs attrs,
3167 MemTxResult *result,
3168 enum device_endian endian)
3170 uint8_t *ptr;
3171 uint64_t val;
3172 MemoryRegion *mr;
3173 hwaddr l = 8;
3174 hwaddr addr1;
3175 MemTxResult r;
3176 bool release_lock = false;
3178 rcu_read_lock();
3179 mr = address_space_translate(as, addr, &addr1, &l,
3180 false);
3181 if (l < 8 || !memory_access_is_direct(mr, false)) {
3182 release_lock |= prepare_mmio_access(mr);
3184 /* I/O case */
3185 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3186 #if defined(TARGET_WORDS_BIGENDIAN)
3187 if (endian == DEVICE_LITTLE_ENDIAN) {
3188 val = bswap64(val);
3190 #else
3191 if (endian == DEVICE_BIG_ENDIAN) {
3192 val = bswap64(val);
3194 #endif
3195 } else {
3196 /* RAM case */
3197 ptr = qemu_get_ram_ptr(mr->ram_block,
3198 (memory_region_get_ram_addr(mr)
3199 & TARGET_PAGE_MASK)
3200 + addr1);
3201 switch (endian) {
3202 case DEVICE_LITTLE_ENDIAN:
3203 val = ldq_le_p(ptr);
3204 break;
3205 case DEVICE_BIG_ENDIAN:
3206 val = ldq_be_p(ptr);
3207 break;
3208 default:
3209 val = ldq_p(ptr);
3210 break;
3212 r = MEMTX_OK;
3214 if (result) {
3215 *result = r;
3217 if (release_lock) {
3218 qemu_mutex_unlock_iothread();
3220 rcu_read_unlock();
3221 return val;
3224 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3225 MemTxAttrs attrs, MemTxResult *result)
3227 return address_space_ldq_internal(as, addr, attrs, result,
3228 DEVICE_NATIVE_ENDIAN);
3231 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3232 MemTxAttrs attrs, MemTxResult *result)
3234 return address_space_ldq_internal(as, addr, attrs, result,
3235 DEVICE_LITTLE_ENDIAN);
3238 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3239 MemTxAttrs attrs, MemTxResult *result)
3241 return address_space_ldq_internal(as, addr, attrs, result,
3242 DEVICE_BIG_ENDIAN);
3245 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3247 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3250 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3252 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3255 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3257 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3260 /* XXX: optimize */
3261 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3262 MemTxAttrs attrs, MemTxResult *result)
3264 uint8_t val;
3265 MemTxResult r;
3267 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3268 if (result) {
3269 *result = r;
3271 return val;
3274 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3276 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3279 /* warning: addr must be aligned */
3280 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3281 hwaddr addr,
3282 MemTxAttrs attrs,
3283 MemTxResult *result,
3284 enum device_endian endian)
3286 uint8_t *ptr;
3287 uint64_t val;
3288 MemoryRegion *mr;
3289 hwaddr l = 2;
3290 hwaddr addr1;
3291 MemTxResult r;
3292 bool release_lock = false;
3294 rcu_read_lock();
3295 mr = address_space_translate(as, addr, &addr1, &l,
3296 false);
3297 if (l < 2 || !memory_access_is_direct(mr, false)) {
3298 release_lock |= prepare_mmio_access(mr);
3300 /* I/O case */
3301 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3302 #if defined(TARGET_WORDS_BIGENDIAN)
3303 if (endian == DEVICE_LITTLE_ENDIAN) {
3304 val = bswap16(val);
3306 #else
3307 if (endian == DEVICE_BIG_ENDIAN) {
3308 val = bswap16(val);
3310 #endif
3311 } else {
3312 /* RAM case */
3313 ptr = qemu_get_ram_ptr(mr->ram_block,
3314 (memory_region_get_ram_addr(mr)
3315 & TARGET_PAGE_MASK)
3316 + addr1);
3317 switch (endian) {
3318 case DEVICE_LITTLE_ENDIAN:
3319 val = lduw_le_p(ptr);
3320 break;
3321 case DEVICE_BIG_ENDIAN:
3322 val = lduw_be_p(ptr);
3323 break;
3324 default:
3325 val = lduw_p(ptr);
3326 break;
3328 r = MEMTX_OK;
3330 if (result) {
3331 *result = r;
3333 if (release_lock) {
3334 qemu_mutex_unlock_iothread();
3336 rcu_read_unlock();
3337 return val;
3340 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3341 MemTxAttrs attrs, MemTxResult *result)
3343 return address_space_lduw_internal(as, addr, attrs, result,
3344 DEVICE_NATIVE_ENDIAN);
3347 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3348 MemTxAttrs attrs, MemTxResult *result)
3350 return address_space_lduw_internal(as, addr, attrs, result,
3351 DEVICE_LITTLE_ENDIAN);
3354 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3355 MemTxAttrs attrs, MemTxResult *result)
3357 return address_space_lduw_internal(as, addr, attrs, result,
3358 DEVICE_BIG_ENDIAN);
3361 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3363 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3366 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3368 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3371 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3373 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3376 /* warning: addr must be aligned. The ram page is not masked as dirty
3377 and the code inside is not invalidated. It is useful if the dirty
3378 bits are used to track modified PTEs */
3379 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3380 MemTxAttrs attrs, MemTxResult *result)
3382 uint8_t *ptr;
3383 MemoryRegion *mr;
3384 hwaddr l = 4;
3385 hwaddr addr1;
3386 MemTxResult r;
3387 uint8_t dirty_log_mask;
3388 bool release_lock = false;
3390 rcu_read_lock();
3391 mr = address_space_translate(as, addr, &addr1, &l,
3392 true);
3393 if (l < 4 || !memory_access_is_direct(mr, true)) {
3394 release_lock |= prepare_mmio_access(mr);
3396 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3397 } else {
3398 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3399 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3400 stl_p(ptr, val);
3402 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3403 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3404 cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3405 r = MEMTX_OK;
3407 if (result) {
3408 *result = r;
3410 if (release_lock) {
3411 qemu_mutex_unlock_iothread();
3413 rcu_read_unlock();
3416 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3418 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3421 /* warning: addr must be aligned */
3422 static inline void address_space_stl_internal(AddressSpace *as,
3423 hwaddr addr, uint32_t val,
3424 MemTxAttrs attrs,
3425 MemTxResult *result,
3426 enum device_endian endian)
3428 uint8_t *ptr;
3429 MemoryRegion *mr;
3430 hwaddr l = 4;
3431 hwaddr addr1;
3432 MemTxResult r;
3433 bool release_lock = false;
3435 rcu_read_lock();
3436 mr = address_space_translate(as, addr, &addr1, &l,
3437 true);
3438 if (l < 4 || !memory_access_is_direct(mr, true)) {
3439 release_lock |= prepare_mmio_access(mr);
3441 #if defined(TARGET_WORDS_BIGENDIAN)
3442 if (endian == DEVICE_LITTLE_ENDIAN) {
3443 val = bswap32(val);
3445 #else
3446 if (endian == DEVICE_BIG_ENDIAN) {
3447 val = bswap32(val);
3449 #endif
3450 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3451 } else {
3452 /* RAM case */
3453 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3454 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3455 switch (endian) {
3456 case DEVICE_LITTLE_ENDIAN:
3457 stl_le_p(ptr, val);
3458 break;
3459 case DEVICE_BIG_ENDIAN:
3460 stl_be_p(ptr, val);
3461 break;
3462 default:
3463 stl_p(ptr, val);
3464 break;
3466 invalidate_and_set_dirty(mr, addr1, 4);
3467 r = MEMTX_OK;
3469 if (result) {
3470 *result = r;
3472 if (release_lock) {
3473 qemu_mutex_unlock_iothread();
3475 rcu_read_unlock();
3478 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3479 MemTxAttrs attrs, MemTxResult *result)
3481 address_space_stl_internal(as, addr, val, attrs, result,
3482 DEVICE_NATIVE_ENDIAN);
3485 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3486 MemTxAttrs attrs, MemTxResult *result)
3488 address_space_stl_internal(as, addr, val, attrs, result,
3489 DEVICE_LITTLE_ENDIAN);
3492 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3493 MemTxAttrs attrs, MemTxResult *result)
3495 address_space_stl_internal(as, addr, val, attrs, result,
3496 DEVICE_BIG_ENDIAN);
3499 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3501 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3504 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3506 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3509 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3511 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3514 /* XXX: optimize */
3515 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3516 MemTxAttrs attrs, MemTxResult *result)
3518 uint8_t v = val;
3519 MemTxResult r;
3521 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3522 if (result) {
3523 *result = r;
3527 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3529 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3532 /* warning: addr must be aligned */
3533 static inline void address_space_stw_internal(AddressSpace *as,
3534 hwaddr addr, uint32_t val,
3535 MemTxAttrs attrs,
3536 MemTxResult *result,
3537 enum device_endian endian)
3539 uint8_t *ptr;
3540 MemoryRegion *mr;
3541 hwaddr l = 2;
3542 hwaddr addr1;
3543 MemTxResult r;
3544 bool release_lock = false;
3546 rcu_read_lock();
3547 mr = address_space_translate(as, addr, &addr1, &l, true);
3548 if (l < 2 || !memory_access_is_direct(mr, true)) {
3549 release_lock |= prepare_mmio_access(mr);
3551 #if defined(TARGET_WORDS_BIGENDIAN)
3552 if (endian == DEVICE_LITTLE_ENDIAN) {
3553 val = bswap16(val);
3555 #else
3556 if (endian == DEVICE_BIG_ENDIAN) {
3557 val = bswap16(val);
3559 #endif
3560 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3561 } else {
3562 /* RAM case */
3563 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3564 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3565 switch (endian) {
3566 case DEVICE_LITTLE_ENDIAN:
3567 stw_le_p(ptr, val);
3568 break;
3569 case DEVICE_BIG_ENDIAN:
3570 stw_be_p(ptr, val);
3571 break;
3572 default:
3573 stw_p(ptr, val);
3574 break;
3576 invalidate_and_set_dirty(mr, addr1, 2);
3577 r = MEMTX_OK;
3579 if (result) {
3580 *result = r;
3582 if (release_lock) {
3583 qemu_mutex_unlock_iothread();
3585 rcu_read_unlock();
3588 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3589 MemTxAttrs attrs, MemTxResult *result)
3591 address_space_stw_internal(as, addr, val, attrs, result,
3592 DEVICE_NATIVE_ENDIAN);
3595 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3596 MemTxAttrs attrs, MemTxResult *result)
3598 address_space_stw_internal(as, addr, val, attrs, result,
3599 DEVICE_LITTLE_ENDIAN);
3602 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3603 MemTxAttrs attrs, MemTxResult *result)
3605 address_space_stw_internal(as, addr, val, attrs, result,
3606 DEVICE_BIG_ENDIAN);
3609 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3611 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3614 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3616 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3619 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3621 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3624 /* XXX: optimize */
3625 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3626 MemTxAttrs attrs, MemTxResult *result)
3628 MemTxResult r;
3629 val = tswap64(val);
3630 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3631 if (result) {
3632 *result = r;
3636 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3637 MemTxAttrs attrs, MemTxResult *result)
3639 MemTxResult r;
3640 val = cpu_to_le64(val);
3641 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3642 if (result) {
3643 *result = r;
3646 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3647 MemTxAttrs attrs, MemTxResult *result)
3649 MemTxResult r;
3650 val = cpu_to_be64(val);
3651 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3652 if (result) {
3653 *result = r;
3657 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3659 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3662 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3664 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3667 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3669 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3672 /* virtual memory access for debug (includes writing to ROM) */
3673 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3674 uint8_t *buf, int len, int is_write)
3676 int l;
3677 hwaddr phys_addr;
3678 target_ulong page;
3680 while (len > 0) {
3681 int asidx;
3682 MemTxAttrs attrs;
3684 page = addr & TARGET_PAGE_MASK;
3685 phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3686 asidx = cpu_asidx_from_attrs(cpu, attrs);
3687 /* if no physical page mapped, return an error */
3688 if (phys_addr == -1)
3689 return -1;
3690 l = (page + TARGET_PAGE_SIZE) - addr;
3691 if (l > len)
3692 l = len;
3693 phys_addr += (addr & ~TARGET_PAGE_MASK);
3694 if (is_write) {
3695 cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3696 phys_addr, buf, l);
3697 } else {
3698 address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3699 MEMTXATTRS_UNSPECIFIED,
3700 buf, l, 0);
3702 len -= l;
3703 buf += l;
3704 addr += l;
3706 return 0;
3710 * Allows code that needs to deal with migration bitmaps etc to still be built
3711 * target independent.
3713 size_t qemu_target_page_bits(void)
3715 return TARGET_PAGE_BITS;
3718 #endif
3721 * A helper function for the _utterly broken_ virtio device model to find out if
3722 * it's running on a big endian machine. Don't do this at home kids!
3724 bool target_words_bigendian(void);
3725 bool target_words_bigendian(void)
3727 #if defined(TARGET_WORDS_BIGENDIAN)
3728 return true;
3729 #else
3730 return false;
3731 #endif
3734 #ifndef CONFIG_USER_ONLY
3735 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3737 MemoryRegion*mr;
3738 hwaddr l = 1;
3739 bool res;
3741 rcu_read_lock();
3742 mr = address_space_translate(&address_space_memory,
3743 phys_addr, &phys_addr, &l, false);
3745 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3746 rcu_read_unlock();
3747 return res;
3750 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3752 RAMBlock *block;
3753 int ret = 0;
3755 rcu_read_lock();
3756 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3757 ret = func(block->idstr, block->host, block->offset,
3758 block->used_length, opaque);
3759 if (ret) {
3760 break;
3763 rcu_read_unlock();
3764 return ret;
3766 #endif