Revert "spapr: Ensure CPU cores are added contiguously and removed in LIFO order"
[qemu.git] / exec.c
blob50e3ee237cf1fff42dbd00ab53f5ca7152fc2ede
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include "qapi/error.h"
21 #ifndef _WIN32
22 #endif
24 #include "qemu/cutils.h"
25 #include "cpu.h"
26 #include "exec/exec-all.h"
27 #include "tcg.h"
28 #include "hw/qdev-core.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
31 #include "hw/xen/xen.h"
32 #endif
33 #include "sysemu/kvm.h"
34 #include "sysemu/sysemu.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "qemu/error-report.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include "qemu.h"
40 #else /* !CONFIG_USER_ONLY */
41 #include "hw/hw.h"
42 #include "exec/memory.h"
43 #include "exec/ioport.h"
44 #include "sysemu/dma.h"
45 #include "exec/address-spaces.h"
46 #include "sysemu/xen-mapcache.h"
47 #include "trace.h"
48 #endif
49 #include "exec/cpu-all.h"
50 #include "qemu/rcu_queue.h"
51 #include "qemu/main-loop.h"
52 #include "translate-all.h"
53 #include "sysemu/replay.h"
55 #include "exec/memory-internal.h"
56 #include "exec/ram_addr.h"
57 #include "exec/log.h"
59 #include "migration/vmstate.h"
61 #include "qemu/range.h"
62 #ifndef _WIN32
63 #include "qemu/mmap-alloc.h"
64 #endif
66 //#define DEBUG_SUBPAGE
68 #if !defined(CONFIG_USER_ONLY)
69 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
70 * are protected by the ramlist lock.
72 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
74 static MemoryRegion *system_memory;
75 static MemoryRegion *system_io;
77 AddressSpace address_space_io;
78 AddressSpace address_space_memory;
80 MemoryRegion io_mem_rom, io_mem_notdirty;
81 static MemoryRegion io_mem_unassigned;
83 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
84 #define RAM_PREALLOC (1 << 0)
86 /* RAM is mmap-ed with MAP_SHARED */
87 #define RAM_SHARED (1 << 1)
89 /* Only a portion of RAM (used_length) is actually used, and migrated.
90 * This used_length size can change across reboots.
92 #define RAM_RESIZEABLE (1 << 2)
94 #endif
96 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
97 /* current CPU in the current thread. It is only valid inside
98 cpu_exec() */
99 __thread CPUState *current_cpu;
100 /* 0 = Do not count executed instructions.
101 1 = Precise instruction counting.
102 2 = Adaptive rate instruction counting. */
103 int use_icount;
105 #if !defined(CONFIG_USER_ONLY)
107 typedef struct PhysPageEntry PhysPageEntry;
109 struct PhysPageEntry {
110 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
111 uint32_t skip : 6;
112 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
113 uint32_t ptr : 26;
116 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
118 /* Size of the L2 (and L3, etc) page tables. */
119 #define ADDR_SPACE_BITS 64
121 #define P_L2_BITS 9
122 #define P_L2_SIZE (1 << P_L2_BITS)
124 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
126 typedef PhysPageEntry Node[P_L2_SIZE];
128 typedef struct PhysPageMap {
129 struct rcu_head rcu;
131 unsigned sections_nb;
132 unsigned sections_nb_alloc;
133 unsigned nodes_nb;
134 unsigned nodes_nb_alloc;
135 Node *nodes;
136 MemoryRegionSection *sections;
137 } PhysPageMap;
139 struct AddressSpaceDispatch {
140 struct rcu_head rcu;
142 MemoryRegionSection *mru_section;
143 /* This is a multi-level map on the physical address space.
144 * The bottom level has pointers to MemoryRegionSections.
146 PhysPageEntry phys_map;
147 PhysPageMap map;
148 AddressSpace *as;
151 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
152 typedef struct subpage_t {
153 MemoryRegion iomem;
154 AddressSpace *as;
155 hwaddr base;
156 uint16_t sub_section[TARGET_PAGE_SIZE];
157 } subpage_t;
159 #define PHYS_SECTION_UNASSIGNED 0
160 #define PHYS_SECTION_NOTDIRTY 1
161 #define PHYS_SECTION_ROM 2
162 #define PHYS_SECTION_WATCH 3
164 static void io_mem_init(void);
165 static void memory_map_init(void);
166 static void tcg_commit(MemoryListener *listener);
168 static MemoryRegion io_mem_watch;
171 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
172 * @cpu: the CPU whose AddressSpace this is
173 * @as: the AddressSpace itself
174 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
175 * @tcg_as_listener: listener for tracking changes to the AddressSpace
177 struct CPUAddressSpace {
178 CPUState *cpu;
179 AddressSpace *as;
180 struct AddressSpaceDispatch *memory_dispatch;
181 MemoryListener tcg_as_listener;
184 #endif
186 #if !defined(CONFIG_USER_ONLY)
188 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
190 static unsigned alloc_hint = 16;
191 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
192 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, alloc_hint);
193 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
194 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
195 alloc_hint = map->nodes_nb_alloc;
199 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
201 unsigned i;
202 uint32_t ret;
203 PhysPageEntry e;
204 PhysPageEntry *p;
206 ret = map->nodes_nb++;
207 p = map->nodes[ret];
208 assert(ret != PHYS_MAP_NODE_NIL);
209 assert(ret != map->nodes_nb_alloc);
211 e.skip = leaf ? 0 : 1;
212 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
213 for (i = 0; i < P_L2_SIZE; ++i) {
214 memcpy(&p[i], &e, sizeof(e));
216 return ret;
219 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
220 hwaddr *index, hwaddr *nb, uint16_t leaf,
221 int level)
223 PhysPageEntry *p;
224 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
226 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
227 lp->ptr = phys_map_node_alloc(map, level == 0);
229 p = map->nodes[lp->ptr];
230 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
232 while (*nb && lp < &p[P_L2_SIZE]) {
233 if ((*index & (step - 1)) == 0 && *nb >= step) {
234 lp->skip = 0;
235 lp->ptr = leaf;
236 *index += step;
237 *nb -= step;
238 } else {
239 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
241 ++lp;
245 static void phys_page_set(AddressSpaceDispatch *d,
246 hwaddr index, hwaddr nb,
247 uint16_t leaf)
249 /* Wildly overreserve - it doesn't matter much. */
250 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
252 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
255 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
256 * and update our entry so we can skip it and go directly to the destination.
258 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
260 unsigned valid_ptr = P_L2_SIZE;
261 int valid = 0;
262 PhysPageEntry *p;
263 int i;
265 if (lp->ptr == PHYS_MAP_NODE_NIL) {
266 return;
269 p = nodes[lp->ptr];
270 for (i = 0; i < P_L2_SIZE; i++) {
271 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
272 continue;
275 valid_ptr = i;
276 valid++;
277 if (p[i].skip) {
278 phys_page_compact(&p[i], nodes, compacted);
282 /* We can only compress if there's only one child. */
283 if (valid != 1) {
284 return;
287 assert(valid_ptr < P_L2_SIZE);
289 /* Don't compress if it won't fit in the # of bits we have. */
290 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
291 return;
294 lp->ptr = p[valid_ptr].ptr;
295 if (!p[valid_ptr].skip) {
296 /* If our only child is a leaf, make this a leaf. */
297 /* By design, we should have made this node a leaf to begin with so we
298 * should never reach here.
299 * But since it's so simple to handle this, let's do it just in case we
300 * change this rule.
302 lp->skip = 0;
303 } else {
304 lp->skip += p[valid_ptr].skip;
308 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
310 DECLARE_BITMAP(compacted, nodes_nb);
312 if (d->phys_map.skip) {
313 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
317 static inline bool section_covers_addr(const MemoryRegionSection *section,
318 hwaddr addr)
320 /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
321 * the section must cover the entire address space.
323 return section->size.hi ||
324 range_covers_byte(section->offset_within_address_space,
325 section->size.lo, addr);
328 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
329 Node *nodes, MemoryRegionSection *sections)
331 PhysPageEntry *p;
332 hwaddr index = addr >> TARGET_PAGE_BITS;
333 int i;
335 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
336 if (lp.ptr == PHYS_MAP_NODE_NIL) {
337 return &sections[PHYS_SECTION_UNASSIGNED];
339 p = nodes[lp.ptr];
340 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
343 if (section_covers_addr(&sections[lp.ptr], addr)) {
344 return &sections[lp.ptr];
345 } else {
346 return &sections[PHYS_SECTION_UNASSIGNED];
350 bool memory_region_is_unassigned(MemoryRegion *mr)
352 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
353 && mr != &io_mem_watch;
356 /* Called from RCU critical section */
357 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
358 hwaddr addr,
359 bool resolve_subpage)
361 MemoryRegionSection *section = atomic_read(&d->mru_section);
362 subpage_t *subpage;
363 bool update;
365 if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
366 section_covers_addr(section, addr)) {
367 update = false;
368 } else {
369 section = phys_page_find(d->phys_map, addr, d->map.nodes,
370 d->map.sections);
371 update = true;
373 if (resolve_subpage && section->mr->subpage) {
374 subpage = container_of(section->mr, subpage_t, iomem);
375 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
377 if (update) {
378 atomic_set(&d->mru_section, section);
380 return section;
383 /* Called from RCU critical section */
384 static MemoryRegionSection *
385 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
386 hwaddr *plen, bool resolve_subpage)
388 MemoryRegionSection *section;
389 MemoryRegion *mr;
390 Int128 diff;
392 section = address_space_lookup_region(d, addr, resolve_subpage);
393 /* Compute offset within MemoryRegionSection */
394 addr -= section->offset_within_address_space;
396 /* Compute offset within MemoryRegion */
397 *xlat = addr + section->offset_within_region;
399 mr = section->mr;
401 /* MMIO registers can be expected to perform full-width accesses based only
402 * on their address, without considering adjacent registers that could
403 * decode to completely different MemoryRegions. When such registers
404 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
405 * regions overlap wildly. For this reason we cannot clamp the accesses
406 * here.
408 * If the length is small (as is the case for address_space_ldl/stl),
409 * everything works fine. If the incoming length is large, however,
410 * the caller really has to do the clamping through memory_access_size.
412 if (memory_region_is_ram(mr)) {
413 diff = int128_sub(section->size, int128_make64(addr));
414 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
416 return section;
419 /* Called from RCU critical section */
420 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
421 hwaddr *xlat, hwaddr *plen,
422 bool is_write)
424 IOMMUTLBEntry iotlb;
425 MemoryRegionSection *section;
426 MemoryRegion *mr;
428 for (;;) {
429 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
430 section = address_space_translate_internal(d, addr, &addr, plen, true);
431 mr = section->mr;
433 if (!mr->iommu_ops) {
434 break;
437 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
438 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
439 | (addr & iotlb.addr_mask));
440 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
441 if (!(iotlb.perm & (1 << is_write))) {
442 mr = &io_mem_unassigned;
443 break;
446 as = iotlb.target_as;
449 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
450 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
451 *plen = MIN(page, *plen);
454 *xlat = addr;
455 return mr;
458 /* Called from RCU critical section */
459 MemoryRegionSection *
460 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
461 hwaddr *xlat, hwaddr *plen)
463 MemoryRegionSection *section;
464 AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;
466 section = address_space_translate_internal(d, addr, xlat, plen, false);
468 assert(!section->mr->iommu_ops);
469 return section;
471 #endif
473 #if !defined(CONFIG_USER_ONLY)
475 static int cpu_common_post_load(void *opaque, int version_id)
477 CPUState *cpu = opaque;
479 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
480 version_id is increased. */
481 cpu->interrupt_request &= ~0x01;
482 tlb_flush(cpu, 1);
484 return 0;
487 static int cpu_common_pre_load(void *opaque)
489 CPUState *cpu = opaque;
491 cpu->exception_index = -1;
493 return 0;
496 static bool cpu_common_exception_index_needed(void *opaque)
498 CPUState *cpu = opaque;
500 return tcg_enabled() && cpu->exception_index != -1;
503 static const VMStateDescription vmstate_cpu_common_exception_index = {
504 .name = "cpu_common/exception_index",
505 .version_id = 1,
506 .minimum_version_id = 1,
507 .needed = cpu_common_exception_index_needed,
508 .fields = (VMStateField[]) {
509 VMSTATE_INT32(exception_index, CPUState),
510 VMSTATE_END_OF_LIST()
514 static bool cpu_common_crash_occurred_needed(void *opaque)
516 CPUState *cpu = opaque;
518 return cpu->crash_occurred;
521 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
522 .name = "cpu_common/crash_occurred",
523 .version_id = 1,
524 .minimum_version_id = 1,
525 .needed = cpu_common_crash_occurred_needed,
526 .fields = (VMStateField[]) {
527 VMSTATE_BOOL(crash_occurred, CPUState),
528 VMSTATE_END_OF_LIST()
532 const VMStateDescription vmstate_cpu_common = {
533 .name = "cpu_common",
534 .version_id = 1,
535 .minimum_version_id = 1,
536 .pre_load = cpu_common_pre_load,
537 .post_load = cpu_common_post_load,
538 .fields = (VMStateField[]) {
539 VMSTATE_UINT32(halted, CPUState),
540 VMSTATE_UINT32(interrupt_request, CPUState),
541 VMSTATE_END_OF_LIST()
543 .subsections = (const VMStateDescription*[]) {
544 &vmstate_cpu_common_exception_index,
545 &vmstate_cpu_common_crash_occurred,
546 NULL
550 #endif
552 CPUState *qemu_get_cpu(int index)
554 CPUState *cpu;
556 CPU_FOREACH(cpu) {
557 if (cpu->cpu_index == index) {
558 return cpu;
562 return NULL;
565 #if !defined(CONFIG_USER_ONLY)
566 void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
568 CPUAddressSpace *newas;
570 /* Target code should have set num_ases before calling us */
571 assert(asidx < cpu->num_ases);
573 if (asidx == 0) {
574 /* address space 0 gets the convenience alias */
575 cpu->as = as;
578 /* KVM cannot currently support multiple address spaces. */
579 assert(asidx == 0 || !kvm_enabled());
581 if (!cpu->cpu_ases) {
582 cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
585 newas = &cpu->cpu_ases[asidx];
586 newas->cpu = cpu;
587 newas->as = as;
588 if (tcg_enabled()) {
589 newas->tcg_as_listener.commit = tcg_commit;
590 memory_listener_register(&newas->tcg_as_listener, as);
594 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
596 /* Return the AddressSpace corresponding to the specified index */
597 return cpu->cpu_ases[asidx].as;
599 #endif
601 static int cpu_get_free_index(void)
603 CPUState *some_cpu;
604 int cpu_index = 0;
606 CPU_FOREACH(some_cpu) {
607 cpu_index++;
609 return cpu_index;
612 void cpu_exec_exit(CPUState *cpu)
614 CPUClass *cc = CPU_GET_CLASS(cpu);
616 cpu_list_lock();
617 if (cpu->node.tqe_prev == NULL) {
618 /* there is nothing to undo since cpu_exec_init() hasn't been called */
619 cpu_list_unlock();
620 return;
623 QTAILQ_REMOVE(&cpus, cpu, node);
624 cpu->node.tqe_prev = NULL;
625 cpu->cpu_index = UNASSIGNED_CPU_INDEX;
626 cpu_list_unlock();
628 if (cc->vmsd != NULL) {
629 vmstate_unregister(NULL, cc->vmsd, cpu);
631 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
632 vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
636 void cpu_exec_init(CPUState *cpu, Error **errp)
638 CPUClass *cc ATTRIBUTE_UNUSED = CPU_GET_CLASS(cpu);
639 Error *local_err ATTRIBUTE_UNUSED = NULL;
641 cpu->as = NULL;
642 cpu->num_ases = 0;
644 #ifndef CONFIG_USER_ONLY
645 cpu->thread_id = qemu_get_thread_id();
647 /* This is a softmmu CPU object, so create a property for it
648 * so users can wire up its memory. (This can't go in qom/cpu.c
649 * because that file is compiled only once for both user-mode
650 * and system builds.) The default if no link is set up is to use
651 * the system address space.
653 object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
654 (Object **)&cpu->memory,
655 qdev_prop_allow_set_link_before_realize,
656 OBJ_PROP_LINK_UNREF_ON_RELEASE,
657 &error_abort);
658 cpu->memory = system_memory;
659 object_ref(OBJECT(cpu->memory));
660 #endif
662 cpu_list_lock();
663 if (cpu->cpu_index == UNASSIGNED_CPU_INDEX) {
664 cpu->cpu_index = cpu_get_free_index();
665 assert(cpu->cpu_index != UNASSIGNED_CPU_INDEX);
667 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
668 cpu_list_unlock();
670 #ifndef CONFIG_USER_ONLY
671 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
672 vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
674 if (cc->vmsd != NULL) {
675 vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
677 #endif
680 #if defined(CONFIG_USER_ONLY)
681 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
683 tb_invalidate_phys_page_range(pc, pc + 1, 0);
685 #else
686 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
688 MemTxAttrs attrs;
689 hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
690 int asidx = cpu_asidx_from_attrs(cpu, attrs);
691 if (phys != -1) {
692 tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
693 phys | (pc & ~TARGET_PAGE_MASK));
696 #endif
698 #if defined(CONFIG_USER_ONLY)
699 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
704 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
705 int flags)
707 return -ENOSYS;
710 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
714 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
715 int flags, CPUWatchpoint **watchpoint)
717 return -ENOSYS;
719 #else
720 /* Add a watchpoint. */
721 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
722 int flags, CPUWatchpoint **watchpoint)
724 CPUWatchpoint *wp;
726 /* forbid ranges which are empty or run off the end of the address space */
727 if (len == 0 || (addr + len - 1) < addr) {
728 error_report("tried to set invalid watchpoint at %"
729 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
730 return -EINVAL;
732 wp = g_malloc(sizeof(*wp));
734 wp->vaddr = addr;
735 wp->len = len;
736 wp->flags = flags;
738 /* keep all GDB-injected watchpoints in front */
739 if (flags & BP_GDB) {
740 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
741 } else {
742 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
745 tlb_flush_page(cpu, addr);
747 if (watchpoint)
748 *watchpoint = wp;
749 return 0;
752 /* Remove a specific watchpoint. */
753 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
754 int flags)
756 CPUWatchpoint *wp;
758 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
759 if (addr == wp->vaddr && len == wp->len
760 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
761 cpu_watchpoint_remove_by_ref(cpu, wp);
762 return 0;
765 return -ENOENT;
768 /* Remove a specific watchpoint by reference. */
769 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
771 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
773 tlb_flush_page(cpu, watchpoint->vaddr);
775 g_free(watchpoint);
778 /* Remove all matching watchpoints. */
779 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
781 CPUWatchpoint *wp, *next;
783 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
784 if (wp->flags & mask) {
785 cpu_watchpoint_remove_by_ref(cpu, wp);
790 /* Return true if this watchpoint address matches the specified
791 * access (ie the address range covered by the watchpoint overlaps
792 * partially or completely with the address range covered by the
793 * access).
795 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
796 vaddr addr,
797 vaddr len)
799 /* We know the lengths are non-zero, but a little caution is
800 * required to avoid errors in the case where the range ends
801 * exactly at the top of the address space and so addr + len
802 * wraps round to zero.
804 vaddr wpend = wp->vaddr + wp->len - 1;
805 vaddr addrend = addr + len - 1;
807 return !(addr > wpend || wp->vaddr > addrend);
810 #endif
812 /* Add a breakpoint. */
813 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
814 CPUBreakpoint **breakpoint)
816 CPUBreakpoint *bp;
818 bp = g_malloc(sizeof(*bp));
820 bp->pc = pc;
821 bp->flags = flags;
823 /* keep all GDB-injected breakpoints in front */
824 if (flags & BP_GDB) {
825 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
826 } else {
827 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
830 breakpoint_invalidate(cpu, pc);
832 if (breakpoint) {
833 *breakpoint = bp;
835 return 0;
838 /* Remove a specific breakpoint. */
839 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
841 CPUBreakpoint *bp;
843 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
844 if (bp->pc == pc && bp->flags == flags) {
845 cpu_breakpoint_remove_by_ref(cpu, bp);
846 return 0;
849 return -ENOENT;
852 /* Remove a specific breakpoint by reference. */
853 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
855 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
857 breakpoint_invalidate(cpu, breakpoint->pc);
859 g_free(breakpoint);
862 /* Remove all matching breakpoints. */
863 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
865 CPUBreakpoint *bp, *next;
867 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
868 if (bp->flags & mask) {
869 cpu_breakpoint_remove_by_ref(cpu, bp);
874 /* enable or disable single step mode. EXCP_DEBUG is returned by the
875 CPU loop after each instruction */
876 void cpu_single_step(CPUState *cpu, int enabled)
878 if (cpu->singlestep_enabled != enabled) {
879 cpu->singlestep_enabled = enabled;
880 if (kvm_enabled()) {
881 kvm_update_guest_debug(cpu, 0);
882 } else {
883 /* must flush all the translated code to avoid inconsistencies */
884 /* XXX: only flush what is necessary */
885 tb_flush(cpu);
890 void cpu_abort(CPUState *cpu, const char *fmt, ...)
892 va_list ap;
893 va_list ap2;
895 va_start(ap, fmt);
896 va_copy(ap2, ap);
897 fprintf(stderr, "qemu: fatal: ");
898 vfprintf(stderr, fmt, ap);
899 fprintf(stderr, "\n");
900 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
901 if (qemu_log_separate()) {
902 qemu_log("qemu: fatal: ");
903 qemu_log_vprintf(fmt, ap2);
904 qemu_log("\n");
905 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
906 qemu_log_flush();
907 qemu_log_close();
909 va_end(ap2);
910 va_end(ap);
911 replay_finish();
912 #if defined(CONFIG_USER_ONLY)
914 struct sigaction act;
915 sigfillset(&act.sa_mask);
916 act.sa_handler = SIG_DFL;
917 sigaction(SIGABRT, &act, NULL);
919 #endif
920 abort();
923 #if !defined(CONFIG_USER_ONLY)
924 /* Called from RCU critical section */
925 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
927 RAMBlock *block;
929 block = atomic_rcu_read(&ram_list.mru_block);
930 if (block && addr - block->offset < block->max_length) {
931 return block;
933 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
934 if (addr - block->offset < block->max_length) {
935 goto found;
939 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
940 abort();
942 found:
943 /* It is safe to write mru_block outside the iothread lock. This
944 * is what happens:
946 * mru_block = xxx
947 * rcu_read_unlock()
948 * xxx removed from list
949 * rcu_read_lock()
950 * read mru_block
951 * mru_block = NULL;
952 * call_rcu(reclaim_ramblock, xxx);
953 * rcu_read_unlock()
955 * atomic_rcu_set is not needed here. The block was already published
956 * when it was placed into the list. Here we're just making an extra
957 * copy of the pointer.
959 ram_list.mru_block = block;
960 return block;
963 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
965 CPUState *cpu;
966 ram_addr_t start1;
967 RAMBlock *block;
968 ram_addr_t end;
970 end = TARGET_PAGE_ALIGN(start + length);
971 start &= TARGET_PAGE_MASK;
973 rcu_read_lock();
974 block = qemu_get_ram_block(start);
975 assert(block == qemu_get_ram_block(end - 1));
976 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
977 CPU_FOREACH(cpu) {
978 tlb_reset_dirty(cpu, start1, length);
980 rcu_read_unlock();
983 /* Note: start and end must be within the same ram block. */
984 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
985 ram_addr_t length,
986 unsigned client)
988 DirtyMemoryBlocks *blocks;
989 unsigned long end, page;
990 bool dirty = false;
992 if (length == 0) {
993 return false;
996 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
997 page = start >> TARGET_PAGE_BITS;
999 rcu_read_lock();
1001 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1003 while (page < end) {
1004 unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1005 unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1006 unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1008 dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1009 offset, num);
1010 page += num;
1013 rcu_read_unlock();
1015 if (dirty && tcg_enabled()) {
1016 tlb_reset_dirty_range_all(start, length);
1019 return dirty;
1022 /* Called from RCU critical section */
1023 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1024 MemoryRegionSection *section,
1025 target_ulong vaddr,
1026 hwaddr paddr, hwaddr xlat,
1027 int prot,
1028 target_ulong *address)
1030 hwaddr iotlb;
1031 CPUWatchpoint *wp;
1033 if (memory_region_is_ram(section->mr)) {
1034 /* Normal RAM. */
1035 iotlb = memory_region_get_ram_addr(section->mr) + xlat;
1036 if (!section->readonly) {
1037 iotlb |= PHYS_SECTION_NOTDIRTY;
1038 } else {
1039 iotlb |= PHYS_SECTION_ROM;
1041 } else {
1042 AddressSpaceDispatch *d;
1044 d = atomic_rcu_read(&section->address_space->dispatch);
1045 iotlb = section - d->map.sections;
1046 iotlb += xlat;
1049 /* Make accesses to pages with watchpoints go via the
1050 watchpoint trap routines. */
1051 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1052 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1053 /* Avoid trapping reads of pages with a write breakpoint. */
1054 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1055 iotlb = PHYS_SECTION_WATCH + paddr;
1056 *address |= TLB_MMIO;
1057 break;
1062 return iotlb;
1064 #endif /* defined(CONFIG_USER_ONLY) */
1066 #if !defined(CONFIG_USER_ONLY)
1068 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1069 uint16_t section);
1070 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1072 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1073 qemu_anon_ram_alloc;
1076 * Set a custom physical guest memory alloator.
1077 * Accelerators with unusual needs may need this. Hopefully, we can
1078 * get rid of it eventually.
1080 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1082 phys_mem_alloc = alloc;
1085 static uint16_t phys_section_add(PhysPageMap *map,
1086 MemoryRegionSection *section)
1088 /* The physical section number is ORed with a page-aligned
1089 * pointer to produce the iotlb entries. Thus it should
1090 * never overflow into the page-aligned value.
1092 assert(map->sections_nb < TARGET_PAGE_SIZE);
1094 if (map->sections_nb == map->sections_nb_alloc) {
1095 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1096 map->sections = g_renew(MemoryRegionSection, map->sections,
1097 map->sections_nb_alloc);
1099 map->sections[map->sections_nb] = *section;
1100 memory_region_ref(section->mr);
1101 return map->sections_nb++;
1104 static void phys_section_destroy(MemoryRegion *mr)
1106 bool have_sub_page = mr->subpage;
1108 memory_region_unref(mr);
1110 if (have_sub_page) {
1111 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1112 object_unref(OBJECT(&subpage->iomem));
1113 g_free(subpage);
1117 static void phys_sections_free(PhysPageMap *map)
1119 while (map->sections_nb > 0) {
1120 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1121 phys_section_destroy(section->mr);
1123 g_free(map->sections);
1124 g_free(map->nodes);
1127 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1129 subpage_t *subpage;
1130 hwaddr base = section->offset_within_address_space
1131 & TARGET_PAGE_MASK;
1132 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1133 d->map.nodes, d->map.sections);
1134 MemoryRegionSection subsection = {
1135 .offset_within_address_space = base,
1136 .size = int128_make64(TARGET_PAGE_SIZE),
1138 hwaddr start, end;
1140 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1142 if (!(existing->mr->subpage)) {
1143 subpage = subpage_init(d->as, base);
1144 subsection.address_space = d->as;
1145 subsection.mr = &subpage->iomem;
1146 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1147 phys_section_add(&d->map, &subsection));
1148 } else {
1149 subpage = container_of(existing->mr, subpage_t, iomem);
1151 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1152 end = start + int128_get64(section->size) - 1;
1153 subpage_register(subpage, start, end,
1154 phys_section_add(&d->map, section));
1158 static void register_multipage(AddressSpaceDispatch *d,
1159 MemoryRegionSection *section)
1161 hwaddr start_addr = section->offset_within_address_space;
1162 uint16_t section_index = phys_section_add(&d->map, section);
1163 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1164 TARGET_PAGE_BITS));
1166 assert(num_pages);
1167 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1170 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1172 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1173 AddressSpaceDispatch *d = as->next_dispatch;
1174 MemoryRegionSection now = *section, remain = *section;
1175 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1177 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1178 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1179 - now.offset_within_address_space;
1181 now.size = int128_min(int128_make64(left), now.size);
1182 register_subpage(d, &now);
1183 } else {
1184 now.size = int128_zero();
1186 while (int128_ne(remain.size, now.size)) {
1187 remain.size = int128_sub(remain.size, now.size);
1188 remain.offset_within_address_space += int128_get64(now.size);
1189 remain.offset_within_region += int128_get64(now.size);
1190 now = remain;
1191 if (int128_lt(remain.size, page_size)) {
1192 register_subpage(d, &now);
1193 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1194 now.size = page_size;
1195 register_subpage(d, &now);
1196 } else {
1197 now.size = int128_and(now.size, int128_neg(page_size));
1198 register_multipage(d, &now);
1203 void qemu_flush_coalesced_mmio_buffer(void)
1205 if (kvm_enabled())
1206 kvm_flush_coalesced_mmio_buffer();
1209 void qemu_mutex_lock_ramlist(void)
1211 qemu_mutex_lock(&ram_list.mutex);
1214 void qemu_mutex_unlock_ramlist(void)
1216 qemu_mutex_unlock(&ram_list.mutex);
1219 #ifdef __linux__
1220 static void *file_ram_alloc(RAMBlock *block,
1221 ram_addr_t memory,
1222 const char *path,
1223 Error **errp)
1225 bool unlink_on_error = false;
1226 char *filename;
1227 char *sanitized_name;
1228 char *c;
1229 void *area;
1230 int fd = -1;
1231 int64_t page_size;
1233 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1234 error_setg(errp,
1235 "host lacks kvm mmu notifiers, -mem-path unsupported");
1236 return NULL;
1239 for (;;) {
1240 fd = open(path, O_RDWR);
1241 if (fd >= 0) {
1242 /* @path names an existing file, use it */
1243 break;
1245 if (errno == ENOENT) {
1246 /* @path names a file that doesn't exist, create it */
1247 fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1248 if (fd >= 0) {
1249 unlink_on_error = true;
1250 break;
1252 } else if (errno == EISDIR) {
1253 /* @path names a directory, create a file there */
1254 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1255 sanitized_name = g_strdup(memory_region_name(block->mr));
1256 for (c = sanitized_name; *c != '\0'; c++) {
1257 if (*c == '/') {
1258 *c = '_';
1262 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1263 sanitized_name);
1264 g_free(sanitized_name);
1266 fd = mkstemp(filename);
1267 if (fd >= 0) {
1268 unlink(filename);
1269 g_free(filename);
1270 break;
1272 g_free(filename);
1274 if (errno != EEXIST && errno != EINTR) {
1275 error_setg_errno(errp, errno,
1276 "can't open backing store %s for guest RAM",
1277 path);
1278 goto error;
1281 * Try again on EINTR and EEXIST. The latter happens when
1282 * something else creates the file between our two open().
1286 page_size = qemu_fd_getpagesize(fd);
1287 block->mr->align = MAX(page_size, QEMU_VMALLOC_ALIGN);
1289 if (memory < page_size) {
1290 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1291 "or larger than page size 0x%" PRIx64,
1292 memory, page_size);
1293 goto error;
1296 memory = ROUND_UP(memory, page_size);
1299 * ftruncate is not supported by hugetlbfs in older
1300 * hosts, so don't bother bailing out on errors.
1301 * If anything goes wrong with it under other filesystems,
1302 * mmap will fail.
1304 if (ftruncate(fd, memory)) {
1305 perror("ftruncate");
1308 area = qemu_ram_mmap(fd, memory, block->mr->align,
1309 block->flags & RAM_SHARED);
1310 if (area == MAP_FAILED) {
1311 error_setg_errno(errp, errno,
1312 "unable to map backing store for guest RAM");
1313 goto error;
1316 if (mem_prealloc) {
1317 os_mem_prealloc(fd, area, memory);
1320 block->fd = fd;
1321 return area;
1323 error:
1324 if (unlink_on_error) {
1325 unlink(path);
1327 if (fd != -1) {
1328 close(fd);
1330 return NULL;
1332 #endif
1334 /* Called with the ramlist lock held. */
1335 static ram_addr_t find_ram_offset(ram_addr_t size)
1337 RAMBlock *block, *next_block;
1338 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1340 assert(size != 0); /* it would hand out same offset multiple times */
1342 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1343 return 0;
1346 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1347 ram_addr_t end, next = RAM_ADDR_MAX;
1349 end = block->offset + block->max_length;
1351 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1352 if (next_block->offset >= end) {
1353 next = MIN(next, next_block->offset);
1356 if (next - end >= size && next - end < mingap) {
1357 offset = end;
1358 mingap = next - end;
1362 if (offset == RAM_ADDR_MAX) {
1363 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1364 (uint64_t)size);
1365 abort();
1368 return offset;
1371 ram_addr_t last_ram_offset(void)
1373 RAMBlock *block;
1374 ram_addr_t last = 0;
1376 rcu_read_lock();
1377 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1378 last = MAX(last, block->offset + block->max_length);
1380 rcu_read_unlock();
1381 return last;
1384 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1386 int ret;
1388 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1389 if (!machine_dump_guest_core(current_machine)) {
1390 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1391 if (ret) {
1392 perror("qemu_madvise");
1393 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1394 "but dump_guest_core=off specified\n");
1399 const char *qemu_ram_get_idstr(RAMBlock *rb)
1401 return rb->idstr;
1404 /* Called with iothread lock held. */
1405 void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
1407 RAMBlock *block;
1409 assert(new_block);
1410 assert(!new_block->idstr[0]);
1412 if (dev) {
1413 char *id = qdev_get_dev_path(dev);
1414 if (id) {
1415 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1416 g_free(id);
1419 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1421 rcu_read_lock();
1422 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1423 if (block != new_block &&
1424 !strcmp(block->idstr, new_block->idstr)) {
1425 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1426 new_block->idstr);
1427 abort();
1430 rcu_read_unlock();
1433 /* Called with iothread lock held. */
1434 void qemu_ram_unset_idstr(RAMBlock *block)
1436 /* FIXME: arch_init.c assumes that this is not called throughout
1437 * migration. Ignore the problem since hot-unplug during migration
1438 * does not work anyway.
1440 if (block) {
1441 memset(block->idstr, 0, sizeof(block->idstr));
1445 static int memory_try_enable_merging(void *addr, size_t len)
1447 if (!machine_mem_merge(current_machine)) {
1448 /* disabled by the user */
1449 return 0;
1452 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1455 /* Only legal before guest might have detected the memory size: e.g. on
1456 * incoming migration, or right after reset.
1458 * As memory core doesn't know how is memory accessed, it is up to
1459 * resize callback to update device state and/or add assertions to detect
1460 * misuse, if necessary.
1462 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
1464 assert(block);
1466 newsize = HOST_PAGE_ALIGN(newsize);
1468 if (block->used_length == newsize) {
1469 return 0;
1472 if (!(block->flags & RAM_RESIZEABLE)) {
1473 error_setg_errno(errp, EINVAL,
1474 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1475 " in != 0x" RAM_ADDR_FMT, block->idstr,
1476 newsize, block->used_length);
1477 return -EINVAL;
1480 if (block->max_length < newsize) {
1481 error_setg_errno(errp, EINVAL,
1482 "Length too large: %s: 0x" RAM_ADDR_FMT
1483 " > 0x" RAM_ADDR_FMT, block->idstr,
1484 newsize, block->max_length);
1485 return -EINVAL;
1488 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1489 block->used_length = newsize;
1490 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1491 DIRTY_CLIENTS_ALL);
1492 memory_region_set_size(block->mr, newsize);
1493 if (block->resized) {
1494 block->resized(block->idstr, newsize, block->host);
1496 return 0;
1499 /* Called with ram_list.mutex held */
1500 static void dirty_memory_extend(ram_addr_t old_ram_size,
1501 ram_addr_t new_ram_size)
1503 ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1504 DIRTY_MEMORY_BLOCK_SIZE);
1505 ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1506 DIRTY_MEMORY_BLOCK_SIZE);
1507 int i;
1509 /* Only need to extend if block count increased */
1510 if (new_num_blocks <= old_num_blocks) {
1511 return;
1514 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1515 DirtyMemoryBlocks *old_blocks;
1516 DirtyMemoryBlocks *new_blocks;
1517 int j;
1519 old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1520 new_blocks = g_malloc(sizeof(*new_blocks) +
1521 sizeof(new_blocks->blocks[0]) * new_num_blocks);
1523 if (old_num_blocks) {
1524 memcpy(new_blocks->blocks, old_blocks->blocks,
1525 old_num_blocks * sizeof(old_blocks->blocks[0]));
1528 for (j = old_num_blocks; j < new_num_blocks; j++) {
1529 new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1532 atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1534 if (old_blocks) {
1535 g_free_rcu(old_blocks, rcu);
1540 static void ram_block_add(RAMBlock *new_block, Error **errp)
1542 RAMBlock *block;
1543 RAMBlock *last_block = NULL;
1544 ram_addr_t old_ram_size, new_ram_size;
1545 Error *err = NULL;
1547 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1549 qemu_mutex_lock_ramlist();
1550 new_block->offset = find_ram_offset(new_block->max_length);
1552 if (!new_block->host) {
1553 if (xen_enabled()) {
1554 xen_ram_alloc(new_block->offset, new_block->max_length,
1555 new_block->mr, &err);
1556 if (err) {
1557 error_propagate(errp, err);
1558 qemu_mutex_unlock_ramlist();
1559 return;
1561 } else {
1562 new_block->host = phys_mem_alloc(new_block->max_length,
1563 &new_block->mr->align);
1564 if (!new_block->host) {
1565 error_setg_errno(errp, errno,
1566 "cannot set up guest memory '%s'",
1567 memory_region_name(new_block->mr));
1568 qemu_mutex_unlock_ramlist();
1569 return;
1571 memory_try_enable_merging(new_block->host, new_block->max_length);
1575 new_ram_size = MAX(old_ram_size,
1576 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1577 if (new_ram_size > old_ram_size) {
1578 migration_bitmap_extend(old_ram_size, new_ram_size);
1579 dirty_memory_extend(old_ram_size, new_ram_size);
1581 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1582 * QLIST (which has an RCU-friendly variant) does not have insertion at
1583 * tail, so save the last element in last_block.
1585 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1586 last_block = block;
1587 if (block->max_length < new_block->max_length) {
1588 break;
1591 if (block) {
1592 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1593 } else if (last_block) {
1594 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1595 } else { /* list is empty */
1596 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1598 ram_list.mru_block = NULL;
1600 /* Write list before version */
1601 smp_wmb();
1602 ram_list.version++;
1603 qemu_mutex_unlock_ramlist();
1605 cpu_physical_memory_set_dirty_range(new_block->offset,
1606 new_block->used_length,
1607 DIRTY_CLIENTS_ALL);
1609 if (new_block->host) {
1610 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1611 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1612 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1613 if (kvm_enabled()) {
1614 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1619 #ifdef __linux__
1620 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1621 bool share, const char *mem_path,
1622 Error **errp)
1624 RAMBlock *new_block;
1625 Error *local_err = NULL;
1627 if (xen_enabled()) {
1628 error_setg(errp, "-mem-path not supported with Xen");
1629 return NULL;
1632 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1634 * file_ram_alloc() needs to allocate just like
1635 * phys_mem_alloc, but we haven't bothered to provide
1636 * a hook there.
1638 error_setg(errp,
1639 "-mem-path not supported with this accelerator");
1640 return NULL;
1643 size = HOST_PAGE_ALIGN(size);
1644 new_block = g_malloc0(sizeof(*new_block));
1645 new_block->mr = mr;
1646 new_block->used_length = size;
1647 new_block->max_length = size;
1648 new_block->flags = share ? RAM_SHARED : 0;
1649 new_block->host = file_ram_alloc(new_block, size,
1650 mem_path, errp);
1651 if (!new_block->host) {
1652 g_free(new_block);
1653 return NULL;
1656 ram_block_add(new_block, &local_err);
1657 if (local_err) {
1658 g_free(new_block);
1659 error_propagate(errp, local_err);
1660 return NULL;
1662 return new_block;
1664 #endif
1666 static
1667 RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1668 void (*resized)(const char*,
1669 uint64_t length,
1670 void *host),
1671 void *host, bool resizeable,
1672 MemoryRegion *mr, Error **errp)
1674 RAMBlock *new_block;
1675 Error *local_err = NULL;
1677 size = HOST_PAGE_ALIGN(size);
1678 max_size = HOST_PAGE_ALIGN(max_size);
1679 new_block = g_malloc0(sizeof(*new_block));
1680 new_block->mr = mr;
1681 new_block->resized = resized;
1682 new_block->used_length = size;
1683 new_block->max_length = max_size;
1684 assert(max_size >= size);
1685 new_block->fd = -1;
1686 new_block->host = host;
1687 if (host) {
1688 new_block->flags |= RAM_PREALLOC;
1690 if (resizeable) {
1691 new_block->flags |= RAM_RESIZEABLE;
1693 ram_block_add(new_block, &local_err);
1694 if (local_err) {
1695 g_free(new_block);
1696 error_propagate(errp, local_err);
1697 return NULL;
1699 return new_block;
1702 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1703 MemoryRegion *mr, Error **errp)
1705 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1708 RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1710 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1713 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1714 void (*resized)(const char*,
1715 uint64_t length,
1716 void *host),
1717 MemoryRegion *mr, Error **errp)
1719 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1722 static void reclaim_ramblock(RAMBlock *block)
1724 if (block->flags & RAM_PREALLOC) {
1726 } else if (xen_enabled()) {
1727 xen_invalidate_map_cache_entry(block->host);
1728 #ifndef _WIN32
1729 } else if (block->fd >= 0) {
1730 qemu_ram_munmap(block->host, block->max_length);
1731 close(block->fd);
1732 #endif
1733 } else {
1734 qemu_anon_ram_free(block->host, block->max_length);
1736 g_free(block);
1739 void qemu_ram_free(RAMBlock *block)
1741 if (!block) {
1742 return;
1745 qemu_mutex_lock_ramlist();
1746 QLIST_REMOVE_RCU(block, next);
1747 ram_list.mru_block = NULL;
1748 /* Write list before version */
1749 smp_wmb();
1750 ram_list.version++;
1751 call_rcu(block, reclaim_ramblock, rcu);
1752 qemu_mutex_unlock_ramlist();
1755 #ifndef _WIN32
1756 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1758 RAMBlock *block;
1759 ram_addr_t offset;
1760 int flags;
1761 void *area, *vaddr;
1763 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1764 offset = addr - block->offset;
1765 if (offset < block->max_length) {
1766 vaddr = ramblock_ptr(block, offset);
1767 if (block->flags & RAM_PREALLOC) {
1769 } else if (xen_enabled()) {
1770 abort();
1771 } else {
1772 flags = MAP_FIXED;
1773 if (block->fd >= 0) {
1774 flags |= (block->flags & RAM_SHARED ?
1775 MAP_SHARED : MAP_PRIVATE);
1776 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1777 flags, block->fd, offset);
1778 } else {
1780 * Remap needs to match alloc. Accelerators that
1781 * set phys_mem_alloc never remap. If they did,
1782 * we'd need a remap hook here.
1784 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1786 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1787 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1788 flags, -1, 0);
1790 if (area != vaddr) {
1791 fprintf(stderr, "Could not remap addr: "
1792 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1793 length, addr);
1794 exit(1);
1796 memory_try_enable_merging(vaddr, length);
1797 qemu_ram_setup_dump(vaddr, length);
1802 #endif /* !_WIN32 */
1804 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1805 * This should not be used for general purpose DMA. Use address_space_map
1806 * or address_space_rw instead. For local memory (e.g. video ram) that the
1807 * device owns, use memory_region_get_ram_ptr.
1809 * Called within RCU critical section.
1811 void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1813 RAMBlock *block = ram_block;
1815 if (block == NULL) {
1816 block = qemu_get_ram_block(addr);
1817 addr -= block->offset;
1820 if (xen_enabled() && block->host == NULL) {
1821 /* We need to check if the requested address is in the RAM
1822 * because we don't want to map the entire memory in QEMU.
1823 * In that case just map until the end of the page.
1825 if (block->offset == 0) {
1826 return xen_map_cache(addr, 0, 0);
1829 block->host = xen_map_cache(block->offset, block->max_length, 1);
1831 return ramblock_ptr(block, addr);
1834 /* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr
1835 * but takes a size argument.
1837 * Called within RCU critical section.
1839 static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1840 hwaddr *size)
1842 RAMBlock *block = ram_block;
1843 if (*size == 0) {
1844 return NULL;
1847 if (block == NULL) {
1848 block = qemu_get_ram_block(addr);
1849 addr -= block->offset;
1851 *size = MIN(*size, block->max_length - addr);
1853 if (xen_enabled() && block->host == NULL) {
1854 /* We need to check if the requested address is in the RAM
1855 * because we don't want to map the entire memory in QEMU.
1856 * In that case just map the requested area.
1858 if (block->offset == 0) {
1859 return xen_map_cache(addr, *size, 1);
1862 block->host = xen_map_cache(block->offset, block->max_length, 1);
1865 return ramblock_ptr(block, addr);
1869 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1870 * in that RAMBlock.
1872 * ptr: Host pointer to look up
1873 * round_offset: If true round the result offset down to a page boundary
1874 * *ram_addr: set to result ram_addr
1875 * *offset: set to result offset within the RAMBlock
1877 * Returns: RAMBlock (or NULL if not found)
1879 * By the time this function returns, the returned pointer is not protected
1880 * by RCU anymore. If the caller is not within an RCU critical section and
1881 * does not hold the iothread lock, it must have other means of protecting the
1882 * pointer, such as a reference to the region that includes the incoming
1883 * ram_addr_t.
1885 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1886 ram_addr_t *offset)
1888 RAMBlock *block;
1889 uint8_t *host = ptr;
1891 if (xen_enabled()) {
1892 ram_addr_t ram_addr;
1893 rcu_read_lock();
1894 ram_addr = xen_ram_addr_from_mapcache(ptr);
1895 block = qemu_get_ram_block(ram_addr);
1896 if (block) {
1897 *offset = ram_addr - block->offset;
1899 rcu_read_unlock();
1900 return block;
1903 rcu_read_lock();
1904 block = atomic_rcu_read(&ram_list.mru_block);
1905 if (block && block->host && host - block->host < block->max_length) {
1906 goto found;
1909 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1910 /* This case append when the block is not mapped. */
1911 if (block->host == NULL) {
1912 continue;
1914 if (host - block->host < block->max_length) {
1915 goto found;
1919 rcu_read_unlock();
1920 return NULL;
1922 found:
1923 *offset = (host - block->host);
1924 if (round_offset) {
1925 *offset &= TARGET_PAGE_MASK;
1927 rcu_read_unlock();
1928 return block;
1932 * Finds the named RAMBlock
1934 * name: The name of RAMBlock to find
1936 * Returns: RAMBlock (or NULL if not found)
1938 RAMBlock *qemu_ram_block_by_name(const char *name)
1940 RAMBlock *block;
1942 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1943 if (!strcmp(name, block->idstr)) {
1944 return block;
1948 return NULL;
1951 /* Some of the softmmu routines need to translate from a host pointer
1952 (typically a TLB entry) back to a ram offset. */
1953 ram_addr_t qemu_ram_addr_from_host(void *ptr)
1955 RAMBlock *block;
1956 ram_addr_t offset;
1958 block = qemu_ram_block_from_host(ptr, false, &offset);
1959 if (!block) {
1960 return RAM_ADDR_INVALID;
1963 return block->offset + offset;
1966 /* Called within RCU critical section. */
1967 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1968 uint64_t val, unsigned size)
1970 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1971 tb_invalidate_phys_page_fast(ram_addr, size);
1973 switch (size) {
1974 case 1:
1975 stb_p(qemu_map_ram_ptr(NULL, ram_addr), val);
1976 break;
1977 case 2:
1978 stw_p(qemu_map_ram_ptr(NULL, ram_addr), val);
1979 break;
1980 case 4:
1981 stl_p(qemu_map_ram_ptr(NULL, ram_addr), val);
1982 break;
1983 default:
1984 abort();
1986 /* Set both VGA and migration bits for simplicity and to remove
1987 * the notdirty callback faster.
1989 cpu_physical_memory_set_dirty_range(ram_addr, size,
1990 DIRTY_CLIENTS_NOCODE);
1991 /* we remove the notdirty callback only if the code has been
1992 flushed */
1993 if (!cpu_physical_memory_is_clean(ram_addr)) {
1994 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
1998 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1999 unsigned size, bool is_write)
2001 return is_write;
2004 static const MemoryRegionOps notdirty_mem_ops = {
2005 .write = notdirty_mem_write,
2006 .valid.accepts = notdirty_mem_accepts,
2007 .endianness = DEVICE_NATIVE_ENDIAN,
2010 /* Generate a debug exception if a watchpoint has been hit. */
2011 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2013 CPUState *cpu = current_cpu;
2014 CPUClass *cc = CPU_GET_CLASS(cpu);
2015 CPUArchState *env = cpu->env_ptr;
2016 target_ulong pc, cs_base;
2017 target_ulong vaddr;
2018 CPUWatchpoint *wp;
2019 uint32_t cpu_flags;
2021 if (cpu->watchpoint_hit) {
2022 /* We re-entered the check after replacing the TB. Now raise
2023 * the debug interrupt so that is will trigger after the
2024 * current instruction. */
2025 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2026 return;
2028 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2029 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2030 if (cpu_watchpoint_address_matches(wp, vaddr, len)
2031 && (wp->flags & flags)) {
2032 if (flags == BP_MEM_READ) {
2033 wp->flags |= BP_WATCHPOINT_HIT_READ;
2034 } else {
2035 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2037 wp->hitaddr = vaddr;
2038 wp->hitattrs = attrs;
2039 if (!cpu->watchpoint_hit) {
2040 if (wp->flags & BP_CPU &&
2041 !cc->debug_check_watchpoint(cpu, wp)) {
2042 wp->flags &= ~BP_WATCHPOINT_HIT;
2043 continue;
2045 cpu->watchpoint_hit = wp;
2046 tb_check_watchpoint(cpu);
2047 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2048 cpu->exception_index = EXCP_DEBUG;
2049 cpu_loop_exit(cpu);
2050 } else {
2051 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2052 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2053 cpu_loop_exit_noexc(cpu);
2056 } else {
2057 wp->flags &= ~BP_WATCHPOINT_HIT;
2062 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2063 so these check for a hit then pass through to the normal out-of-line
2064 phys routines. */
2065 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2066 unsigned size, MemTxAttrs attrs)
2068 MemTxResult res;
2069 uint64_t data;
2070 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2071 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2073 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2074 switch (size) {
2075 case 1:
2076 data = address_space_ldub(as, addr, attrs, &res);
2077 break;
2078 case 2:
2079 data = address_space_lduw(as, addr, attrs, &res);
2080 break;
2081 case 4:
2082 data = address_space_ldl(as, addr, attrs, &res);
2083 break;
2084 default: abort();
2086 *pdata = data;
2087 return res;
2090 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2091 uint64_t val, unsigned size,
2092 MemTxAttrs attrs)
2094 MemTxResult res;
2095 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2096 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2098 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2099 switch (size) {
2100 case 1:
2101 address_space_stb(as, addr, val, attrs, &res);
2102 break;
2103 case 2:
2104 address_space_stw(as, addr, val, attrs, &res);
2105 break;
2106 case 4:
2107 address_space_stl(as, addr, val, attrs, &res);
2108 break;
2109 default: abort();
2111 return res;
2114 static const MemoryRegionOps watch_mem_ops = {
2115 .read_with_attrs = watch_mem_read,
2116 .write_with_attrs = watch_mem_write,
2117 .endianness = DEVICE_NATIVE_ENDIAN,
2120 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2121 unsigned len, MemTxAttrs attrs)
2123 subpage_t *subpage = opaque;
2124 uint8_t buf[8];
2125 MemTxResult res;
2127 #if defined(DEBUG_SUBPAGE)
2128 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2129 subpage, len, addr);
2130 #endif
2131 res = address_space_read(subpage->as, addr + subpage->base,
2132 attrs, buf, len);
2133 if (res) {
2134 return res;
2136 switch (len) {
2137 case 1:
2138 *data = ldub_p(buf);
2139 return MEMTX_OK;
2140 case 2:
2141 *data = lduw_p(buf);
2142 return MEMTX_OK;
2143 case 4:
2144 *data = ldl_p(buf);
2145 return MEMTX_OK;
2146 case 8:
2147 *data = ldq_p(buf);
2148 return MEMTX_OK;
2149 default:
2150 abort();
2154 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2155 uint64_t value, unsigned len, MemTxAttrs attrs)
2157 subpage_t *subpage = opaque;
2158 uint8_t buf[8];
2160 #if defined(DEBUG_SUBPAGE)
2161 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2162 " value %"PRIx64"\n",
2163 __func__, subpage, len, addr, value);
2164 #endif
2165 switch (len) {
2166 case 1:
2167 stb_p(buf, value);
2168 break;
2169 case 2:
2170 stw_p(buf, value);
2171 break;
2172 case 4:
2173 stl_p(buf, value);
2174 break;
2175 case 8:
2176 stq_p(buf, value);
2177 break;
2178 default:
2179 abort();
2181 return address_space_write(subpage->as, addr + subpage->base,
2182 attrs, buf, len);
2185 static bool subpage_accepts(void *opaque, hwaddr addr,
2186 unsigned len, bool is_write)
2188 subpage_t *subpage = opaque;
2189 #if defined(DEBUG_SUBPAGE)
2190 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2191 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2192 #endif
2194 return address_space_access_valid(subpage->as, addr + subpage->base,
2195 len, is_write);
2198 static const MemoryRegionOps subpage_ops = {
2199 .read_with_attrs = subpage_read,
2200 .write_with_attrs = subpage_write,
2201 .impl.min_access_size = 1,
2202 .impl.max_access_size = 8,
2203 .valid.min_access_size = 1,
2204 .valid.max_access_size = 8,
2205 .valid.accepts = subpage_accepts,
2206 .endianness = DEVICE_NATIVE_ENDIAN,
2209 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2210 uint16_t section)
2212 int idx, eidx;
2214 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2215 return -1;
2216 idx = SUBPAGE_IDX(start);
2217 eidx = SUBPAGE_IDX(end);
2218 #if defined(DEBUG_SUBPAGE)
2219 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2220 __func__, mmio, start, end, idx, eidx, section);
2221 #endif
2222 for (; idx <= eidx; idx++) {
2223 mmio->sub_section[idx] = section;
2226 return 0;
2229 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2231 subpage_t *mmio;
2233 mmio = g_malloc0(sizeof(subpage_t));
2235 mmio->as = as;
2236 mmio->base = base;
2237 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2238 NULL, TARGET_PAGE_SIZE);
2239 mmio->iomem.subpage = true;
2240 #if defined(DEBUG_SUBPAGE)
2241 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2242 mmio, base, TARGET_PAGE_SIZE);
2243 #endif
2244 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2246 return mmio;
2249 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2250 MemoryRegion *mr)
2252 assert(as);
2253 MemoryRegionSection section = {
2254 .address_space = as,
2255 .mr = mr,
2256 .offset_within_address_space = 0,
2257 .offset_within_region = 0,
2258 .size = int128_2_64(),
2261 return phys_section_add(map, &section);
2264 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2266 int asidx = cpu_asidx_from_attrs(cpu, attrs);
2267 CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2268 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2269 MemoryRegionSection *sections = d->map.sections;
2271 return sections[index & ~TARGET_PAGE_MASK].mr;
2274 static void io_mem_init(void)
2276 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2277 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2278 NULL, UINT64_MAX);
2279 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2280 NULL, UINT64_MAX);
2281 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2282 NULL, UINT64_MAX);
2285 static void mem_begin(MemoryListener *listener)
2287 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2288 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2289 uint16_t n;
2291 n = dummy_section(&d->map, as, &io_mem_unassigned);
2292 assert(n == PHYS_SECTION_UNASSIGNED);
2293 n = dummy_section(&d->map, as, &io_mem_notdirty);
2294 assert(n == PHYS_SECTION_NOTDIRTY);
2295 n = dummy_section(&d->map, as, &io_mem_rom);
2296 assert(n == PHYS_SECTION_ROM);
2297 n = dummy_section(&d->map, as, &io_mem_watch);
2298 assert(n == PHYS_SECTION_WATCH);
2300 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2301 d->as = as;
2302 as->next_dispatch = d;
2305 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2307 phys_sections_free(&d->map);
2308 g_free(d);
2311 static void mem_commit(MemoryListener *listener)
2313 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2314 AddressSpaceDispatch *cur = as->dispatch;
2315 AddressSpaceDispatch *next = as->next_dispatch;
2317 phys_page_compact_all(next, next->map.nodes_nb);
2319 atomic_rcu_set(&as->dispatch, next);
2320 if (cur) {
2321 call_rcu(cur, address_space_dispatch_free, rcu);
2325 static void tcg_commit(MemoryListener *listener)
2327 CPUAddressSpace *cpuas;
2328 AddressSpaceDispatch *d;
2330 /* since each CPU stores ram addresses in its TLB cache, we must
2331 reset the modified entries */
2332 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2333 cpu_reloading_memory_map();
2334 /* The CPU and TLB are protected by the iothread lock.
2335 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2336 * may have split the RCU critical section.
2338 d = atomic_rcu_read(&cpuas->as->dispatch);
2339 cpuas->memory_dispatch = d;
2340 tlb_flush(cpuas->cpu, 1);
2343 void address_space_init_dispatch(AddressSpace *as)
2345 as->dispatch = NULL;
2346 as->dispatch_listener = (MemoryListener) {
2347 .begin = mem_begin,
2348 .commit = mem_commit,
2349 .region_add = mem_add,
2350 .region_nop = mem_add,
2351 .priority = 0,
2353 memory_listener_register(&as->dispatch_listener, as);
2356 void address_space_unregister(AddressSpace *as)
2358 memory_listener_unregister(&as->dispatch_listener);
2361 void address_space_destroy_dispatch(AddressSpace *as)
2363 AddressSpaceDispatch *d = as->dispatch;
2365 atomic_rcu_set(&as->dispatch, NULL);
2366 if (d) {
2367 call_rcu(d, address_space_dispatch_free, rcu);
2371 static void memory_map_init(void)
2373 system_memory = g_malloc(sizeof(*system_memory));
2375 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2376 address_space_init(&address_space_memory, system_memory, "memory");
2378 system_io = g_malloc(sizeof(*system_io));
2379 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2380 65536);
2381 address_space_init(&address_space_io, system_io, "I/O");
2384 MemoryRegion *get_system_memory(void)
2386 return system_memory;
2389 MemoryRegion *get_system_io(void)
2391 return system_io;
2394 #endif /* !defined(CONFIG_USER_ONLY) */
2396 /* physical memory access (slow version, mainly for debug) */
2397 #if defined(CONFIG_USER_ONLY)
2398 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2399 uint8_t *buf, int len, int is_write)
2401 int l, flags;
2402 target_ulong page;
2403 void * p;
2405 while (len > 0) {
2406 page = addr & TARGET_PAGE_MASK;
2407 l = (page + TARGET_PAGE_SIZE) - addr;
2408 if (l > len)
2409 l = len;
2410 flags = page_get_flags(page);
2411 if (!(flags & PAGE_VALID))
2412 return -1;
2413 if (is_write) {
2414 if (!(flags & PAGE_WRITE))
2415 return -1;
2416 /* XXX: this code should not depend on lock_user */
2417 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2418 return -1;
2419 memcpy(p, buf, l);
2420 unlock_user(p, addr, l);
2421 } else {
2422 if (!(flags & PAGE_READ))
2423 return -1;
2424 /* XXX: this code should not depend on lock_user */
2425 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2426 return -1;
2427 memcpy(buf, p, l);
2428 unlock_user(p, addr, 0);
2430 len -= l;
2431 buf += l;
2432 addr += l;
2434 return 0;
2437 #else
2439 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2440 hwaddr length)
2442 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2443 addr += memory_region_get_ram_addr(mr);
2445 /* No early return if dirty_log_mask is or becomes 0, because
2446 * cpu_physical_memory_set_dirty_range will still call
2447 * xen_modified_memory.
2449 if (dirty_log_mask) {
2450 dirty_log_mask =
2451 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2453 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2454 tb_invalidate_phys_range(addr, addr + length);
2455 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2457 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2460 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2462 unsigned access_size_max = mr->ops->valid.max_access_size;
2464 /* Regions are assumed to support 1-4 byte accesses unless
2465 otherwise specified. */
2466 if (access_size_max == 0) {
2467 access_size_max = 4;
2470 /* Bound the maximum access by the alignment of the address. */
2471 if (!mr->ops->impl.unaligned) {
2472 unsigned align_size_max = addr & -addr;
2473 if (align_size_max != 0 && align_size_max < access_size_max) {
2474 access_size_max = align_size_max;
2478 /* Don't attempt accesses larger than the maximum. */
2479 if (l > access_size_max) {
2480 l = access_size_max;
2482 l = pow2floor(l);
2484 return l;
2487 static bool prepare_mmio_access(MemoryRegion *mr)
2489 bool unlocked = !qemu_mutex_iothread_locked();
2490 bool release_lock = false;
2492 if (unlocked && mr->global_locking) {
2493 qemu_mutex_lock_iothread();
2494 unlocked = false;
2495 release_lock = true;
2497 if (mr->flush_coalesced_mmio) {
2498 if (unlocked) {
2499 qemu_mutex_lock_iothread();
2501 qemu_flush_coalesced_mmio_buffer();
2502 if (unlocked) {
2503 qemu_mutex_unlock_iothread();
2507 return release_lock;
2510 /* Called within RCU critical section. */
2511 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2512 MemTxAttrs attrs,
2513 const uint8_t *buf,
2514 int len, hwaddr addr1,
2515 hwaddr l, MemoryRegion *mr)
2517 uint8_t *ptr;
2518 uint64_t val;
2519 MemTxResult result = MEMTX_OK;
2520 bool release_lock = false;
2522 for (;;) {
2523 if (!memory_access_is_direct(mr, true)) {
2524 release_lock |= prepare_mmio_access(mr);
2525 l = memory_access_size(mr, l, addr1);
2526 /* XXX: could force current_cpu to NULL to avoid
2527 potential bugs */
2528 switch (l) {
2529 case 8:
2530 /* 64 bit write access */
2531 val = ldq_p(buf);
2532 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2533 attrs);
2534 break;
2535 case 4:
2536 /* 32 bit write access */
2537 val = ldl_p(buf);
2538 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2539 attrs);
2540 break;
2541 case 2:
2542 /* 16 bit write access */
2543 val = lduw_p(buf);
2544 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2545 attrs);
2546 break;
2547 case 1:
2548 /* 8 bit write access */
2549 val = ldub_p(buf);
2550 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2551 attrs);
2552 break;
2553 default:
2554 abort();
2556 } else {
2557 /* RAM case */
2558 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2559 memcpy(ptr, buf, l);
2560 invalidate_and_set_dirty(mr, addr1, l);
2563 if (release_lock) {
2564 qemu_mutex_unlock_iothread();
2565 release_lock = false;
2568 len -= l;
2569 buf += l;
2570 addr += l;
2572 if (!len) {
2573 break;
2576 l = len;
2577 mr = address_space_translate(as, addr, &addr1, &l, true);
2580 return result;
2583 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2584 const uint8_t *buf, int len)
2586 hwaddr l;
2587 hwaddr addr1;
2588 MemoryRegion *mr;
2589 MemTxResult result = MEMTX_OK;
2591 if (len > 0) {
2592 rcu_read_lock();
2593 l = len;
2594 mr = address_space_translate(as, addr, &addr1, &l, true);
2595 result = address_space_write_continue(as, addr, attrs, buf, len,
2596 addr1, l, mr);
2597 rcu_read_unlock();
2600 return result;
2603 /* Called within RCU critical section. */
2604 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2605 MemTxAttrs attrs, uint8_t *buf,
2606 int len, hwaddr addr1, hwaddr l,
2607 MemoryRegion *mr)
2609 uint8_t *ptr;
2610 uint64_t val;
2611 MemTxResult result = MEMTX_OK;
2612 bool release_lock = false;
2614 for (;;) {
2615 if (!memory_access_is_direct(mr, false)) {
2616 /* I/O case */
2617 release_lock |= prepare_mmio_access(mr);
2618 l = memory_access_size(mr, l, addr1);
2619 switch (l) {
2620 case 8:
2621 /* 64 bit read access */
2622 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2623 attrs);
2624 stq_p(buf, val);
2625 break;
2626 case 4:
2627 /* 32 bit read access */
2628 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2629 attrs);
2630 stl_p(buf, val);
2631 break;
2632 case 2:
2633 /* 16 bit read access */
2634 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2635 attrs);
2636 stw_p(buf, val);
2637 break;
2638 case 1:
2639 /* 8 bit read access */
2640 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2641 attrs);
2642 stb_p(buf, val);
2643 break;
2644 default:
2645 abort();
2647 } else {
2648 /* RAM case */
2649 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2650 memcpy(buf, ptr, l);
2653 if (release_lock) {
2654 qemu_mutex_unlock_iothread();
2655 release_lock = false;
2658 len -= l;
2659 buf += l;
2660 addr += l;
2662 if (!len) {
2663 break;
2666 l = len;
2667 mr = address_space_translate(as, addr, &addr1, &l, false);
2670 return result;
2673 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2674 MemTxAttrs attrs, uint8_t *buf, int len)
2676 hwaddr l;
2677 hwaddr addr1;
2678 MemoryRegion *mr;
2679 MemTxResult result = MEMTX_OK;
2681 if (len > 0) {
2682 rcu_read_lock();
2683 l = len;
2684 mr = address_space_translate(as, addr, &addr1, &l, false);
2685 result = address_space_read_continue(as, addr, attrs, buf, len,
2686 addr1, l, mr);
2687 rcu_read_unlock();
2690 return result;
2693 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2694 uint8_t *buf, int len, bool is_write)
2696 if (is_write) {
2697 return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2698 } else {
2699 return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2703 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2704 int len, int is_write)
2706 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2707 buf, len, is_write);
2710 enum write_rom_type {
2711 WRITE_DATA,
2712 FLUSH_CACHE,
2715 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2716 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2718 hwaddr l;
2719 uint8_t *ptr;
2720 hwaddr addr1;
2721 MemoryRegion *mr;
2723 rcu_read_lock();
2724 while (len > 0) {
2725 l = len;
2726 mr = address_space_translate(as, addr, &addr1, &l, true);
2728 if (!(memory_region_is_ram(mr) ||
2729 memory_region_is_romd(mr))) {
2730 l = memory_access_size(mr, l, addr1);
2731 } else {
2732 /* ROM/RAM case */
2733 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2734 switch (type) {
2735 case WRITE_DATA:
2736 memcpy(ptr, buf, l);
2737 invalidate_and_set_dirty(mr, addr1, l);
2738 break;
2739 case FLUSH_CACHE:
2740 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2741 break;
2744 len -= l;
2745 buf += l;
2746 addr += l;
2748 rcu_read_unlock();
2751 /* used for ROM loading : can write in RAM and ROM */
2752 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2753 const uint8_t *buf, int len)
2755 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2758 void cpu_flush_icache_range(hwaddr start, int len)
2761 * This function should do the same thing as an icache flush that was
2762 * triggered from within the guest. For TCG we are always cache coherent,
2763 * so there is no need to flush anything. For KVM / Xen we need to flush
2764 * the host's instruction cache at least.
2766 if (tcg_enabled()) {
2767 return;
2770 cpu_physical_memory_write_rom_internal(&address_space_memory,
2771 start, NULL, len, FLUSH_CACHE);
2774 typedef struct {
2775 MemoryRegion *mr;
2776 void *buffer;
2777 hwaddr addr;
2778 hwaddr len;
2779 bool in_use;
2780 } BounceBuffer;
2782 static BounceBuffer bounce;
2784 typedef struct MapClient {
2785 QEMUBH *bh;
2786 QLIST_ENTRY(MapClient) link;
2787 } MapClient;
2789 QemuMutex map_client_list_lock;
2790 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2791 = QLIST_HEAD_INITIALIZER(map_client_list);
2793 static void cpu_unregister_map_client_do(MapClient *client)
2795 QLIST_REMOVE(client, link);
2796 g_free(client);
2799 static void cpu_notify_map_clients_locked(void)
2801 MapClient *client;
2803 while (!QLIST_EMPTY(&map_client_list)) {
2804 client = QLIST_FIRST(&map_client_list);
2805 qemu_bh_schedule(client->bh);
2806 cpu_unregister_map_client_do(client);
2810 void cpu_register_map_client(QEMUBH *bh)
2812 MapClient *client = g_malloc(sizeof(*client));
2814 qemu_mutex_lock(&map_client_list_lock);
2815 client->bh = bh;
2816 QLIST_INSERT_HEAD(&map_client_list, client, link);
2817 if (!atomic_read(&bounce.in_use)) {
2818 cpu_notify_map_clients_locked();
2820 qemu_mutex_unlock(&map_client_list_lock);
2823 void cpu_exec_init_all(void)
2825 qemu_mutex_init(&ram_list.mutex);
2826 io_mem_init();
2827 memory_map_init();
2828 qemu_mutex_init(&map_client_list_lock);
2831 void cpu_unregister_map_client(QEMUBH *bh)
2833 MapClient *client;
2835 qemu_mutex_lock(&map_client_list_lock);
2836 QLIST_FOREACH(client, &map_client_list, link) {
2837 if (client->bh == bh) {
2838 cpu_unregister_map_client_do(client);
2839 break;
2842 qemu_mutex_unlock(&map_client_list_lock);
2845 static void cpu_notify_map_clients(void)
2847 qemu_mutex_lock(&map_client_list_lock);
2848 cpu_notify_map_clients_locked();
2849 qemu_mutex_unlock(&map_client_list_lock);
2852 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2854 MemoryRegion *mr;
2855 hwaddr l, xlat;
2857 rcu_read_lock();
2858 while (len > 0) {
2859 l = len;
2860 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2861 if (!memory_access_is_direct(mr, is_write)) {
2862 l = memory_access_size(mr, l, addr);
2863 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2864 return false;
2868 len -= l;
2869 addr += l;
2871 rcu_read_unlock();
2872 return true;
2875 /* Map a physical memory region into a host virtual address.
2876 * May map a subset of the requested range, given by and returned in *plen.
2877 * May return NULL if resources needed to perform the mapping are exhausted.
2878 * Use only for reads OR writes - not for read-modify-write operations.
2879 * Use cpu_register_map_client() to know when retrying the map operation is
2880 * likely to succeed.
2882 void *address_space_map(AddressSpace *as,
2883 hwaddr addr,
2884 hwaddr *plen,
2885 bool is_write)
2887 hwaddr len = *plen;
2888 hwaddr done = 0;
2889 hwaddr l, xlat, base;
2890 MemoryRegion *mr, *this_mr;
2891 void *ptr;
2893 if (len == 0) {
2894 return NULL;
2897 l = len;
2898 rcu_read_lock();
2899 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2901 if (!memory_access_is_direct(mr, is_write)) {
2902 if (atomic_xchg(&bounce.in_use, true)) {
2903 rcu_read_unlock();
2904 return NULL;
2906 /* Avoid unbounded allocations */
2907 l = MIN(l, TARGET_PAGE_SIZE);
2908 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2909 bounce.addr = addr;
2910 bounce.len = l;
2912 memory_region_ref(mr);
2913 bounce.mr = mr;
2914 if (!is_write) {
2915 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2916 bounce.buffer, l);
2919 rcu_read_unlock();
2920 *plen = l;
2921 return bounce.buffer;
2924 base = xlat;
2926 for (;;) {
2927 len -= l;
2928 addr += l;
2929 done += l;
2930 if (len == 0) {
2931 break;
2934 l = len;
2935 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2936 if (this_mr != mr || xlat != base + done) {
2937 break;
2941 memory_region_ref(mr);
2942 *plen = done;
2943 ptr = qemu_ram_ptr_length(mr->ram_block, base, plen);
2944 rcu_read_unlock();
2946 return ptr;
2949 /* Unmaps a memory region previously mapped by address_space_map().
2950 * Will also mark the memory as dirty if is_write == 1. access_len gives
2951 * the amount of memory that was actually read or written by the caller.
2953 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2954 int is_write, hwaddr access_len)
2956 if (buffer != bounce.buffer) {
2957 MemoryRegion *mr;
2958 ram_addr_t addr1;
2960 mr = memory_region_from_host(buffer, &addr1);
2961 assert(mr != NULL);
2962 if (is_write) {
2963 invalidate_and_set_dirty(mr, addr1, access_len);
2965 if (xen_enabled()) {
2966 xen_invalidate_map_cache_entry(buffer);
2968 memory_region_unref(mr);
2969 return;
2971 if (is_write) {
2972 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
2973 bounce.buffer, access_len);
2975 qemu_vfree(bounce.buffer);
2976 bounce.buffer = NULL;
2977 memory_region_unref(bounce.mr);
2978 atomic_mb_set(&bounce.in_use, false);
2979 cpu_notify_map_clients();
2982 void *cpu_physical_memory_map(hwaddr addr,
2983 hwaddr *plen,
2984 int is_write)
2986 return address_space_map(&address_space_memory, addr, plen, is_write);
2989 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2990 int is_write, hwaddr access_len)
2992 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2995 /* warning: addr must be aligned */
2996 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
2997 MemTxAttrs attrs,
2998 MemTxResult *result,
2999 enum device_endian endian)
3001 uint8_t *ptr;
3002 uint64_t val;
3003 MemoryRegion *mr;
3004 hwaddr l = 4;
3005 hwaddr addr1;
3006 MemTxResult r;
3007 bool release_lock = false;
3009 rcu_read_lock();
3010 mr = address_space_translate(as, addr, &addr1, &l, false);
3011 if (l < 4 || !memory_access_is_direct(mr, false)) {
3012 release_lock |= prepare_mmio_access(mr);
3014 /* I/O case */
3015 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3016 #if defined(TARGET_WORDS_BIGENDIAN)
3017 if (endian == DEVICE_LITTLE_ENDIAN) {
3018 val = bswap32(val);
3020 #else
3021 if (endian == DEVICE_BIG_ENDIAN) {
3022 val = bswap32(val);
3024 #endif
3025 } else {
3026 /* RAM case */
3027 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3028 switch (endian) {
3029 case DEVICE_LITTLE_ENDIAN:
3030 val = ldl_le_p(ptr);
3031 break;
3032 case DEVICE_BIG_ENDIAN:
3033 val = ldl_be_p(ptr);
3034 break;
3035 default:
3036 val = ldl_p(ptr);
3037 break;
3039 r = MEMTX_OK;
3041 if (result) {
3042 *result = r;
3044 if (release_lock) {
3045 qemu_mutex_unlock_iothread();
3047 rcu_read_unlock();
3048 return val;
3051 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3052 MemTxAttrs attrs, MemTxResult *result)
3054 return address_space_ldl_internal(as, addr, attrs, result,
3055 DEVICE_NATIVE_ENDIAN);
3058 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3059 MemTxAttrs attrs, MemTxResult *result)
3061 return address_space_ldl_internal(as, addr, attrs, result,
3062 DEVICE_LITTLE_ENDIAN);
3065 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3066 MemTxAttrs attrs, MemTxResult *result)
3068 return address_space_ldl_internal(as, addr, attrs, result,
3069 DEVICE_BIG_ENDIAN);
3072 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3074 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3077 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3079 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3082 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3084 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3087 /* warning: addr must be aligned */
3088 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3089 MemTxAttrs attrs,
3090 MemTxResult *result,
3091 enum device_endian endian)
3093 uint8_t *ptr;
3094 uint64_t val;
3095 MemoryRegion *mr;
3096 hwaddr l = 8;
3097 hwaddr addr1;
3098 MemTxResult r;
3099 bool release_lock = false;
3101 rcu_read_lock();
3102 mr = address_space_translate(as, addr, &addr1, &l,
3103 false);
3104 if (l < 8 || !memory_access_is_direct(mr, false)) {
3105 release_lock |= prepare_mmio_access(mr);
3107 /* I/O case */
3108 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3109 #if defined(TARGET_WORDS_BIGENDIAN)
3110 if (endian == DEVICE_LITTLE_ENDIAN) {
3111 val = bswap64(val);
3113 #else
3114 if (endian == DEVICE_BIG_ENDIAN) {
3115 val = bswap64(val);
3117 #endif
3118 } else {
3119 /* RAM case */
3120 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3121 switch (endian) {
3122 case DEVICE_LITTLE_ENDIAN:
3123 val = ldq_le_p(ptr);
3124 break;
3125 case DEVICE_BIG_ENDIAN:
3126 val = ldq_be_p(ptr);
3127 break;
3128 default:
3129 val = ldq_p(ptr);
3130 break;
3132 r = MEMTX_OK;
3134 if (result) {
3135 *result = r;
3137 if (release_lock) {
3138 qemu_mutex_unlock_iothread();
3140 rcu_read_unlock();
3141 return val;
3144 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3145 MemTxAttrs attrs, MemTxResult *result)
3147 return address_space_ldq_internal(as, addr, attrs, result,
3148 DEVICE_NATIVE_ENDIAN);
3151 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3152 MemTxAttrs attrs, MemTxResult *result)
3154 return address_space_ldq_internal(as, addr, attrs, result,
3155 DEVICE_LITTLE_ENDIAN);
3158 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3159 MemTxAttrs attrs, MemTxResult *result)
3161 return address_space_ldq_internal(as, addr, attrs, result,
3162 DEVICE_BIG_ENDIAN);
3165 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3167 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3170 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3172 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3175 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3177 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3180 /* XXX: optimize */
3181 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3182 MemTxAttrs attrs, MemTxResult *result)
3184 uint8_t val;
3185 MemTxResult r;
3187 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3188 if (result) {
3189 *result = r;
3191 return val;
3194 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3196 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3199 /* warning: addr must be aligned */
3200 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3201 hwaddr addr,
3202 MemTxAttrs attrs,
3203 MemTxResult *result,
3204 enum device_endian endian)
3206 uint8_t *ptr;
3207 uint64_t val;
3208 MemoryRegion *mr;
3209 hwaddr l = 2;
3210 hwaddr addr1;
3211 MemTxResult r;
3212 bool release_lock = false;
3214 rcu_read_lock();
3215 mr = address_space_translate(as, addr, &addr1, &l,
3216 false);
3217 if (l < 2 || !memory_access_is_direct(mr, false)) {
3218 release_lock |= prepare_mmio_access(mr);
3220 /* I/O case */
3221 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3222 #if defined(TARGET_WORDS_BIGENDIAN)
3223 if (endian == DEVICE_LITTLE_ENDIAN) {
3224 val = bswap16(val);
3226 #else
3227 if (endian == DEVICE_BIG_ENDIAN) {
3228 val = bswap16(val);
3230 #endif
3231 } else {
3232 /* RAM case */
3233 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3234 switch (endian) {
3235 case DEVICE_LITTLE_ENDIAN:
3236 val = lduw_le_p(ptr);
3237 break;
3238 case DEVICE_BIG_ENDIAN:
3239 val = lduw_be_p(ptr);
3240 break;
3241 default:
3242 val = lduw_p(ptr);
3243 break;
3245 r = MEMTX_OK;
3247 if (result) {
3248 *result = r;
3250 if (release_lock) {
3251 qemu_mutex_unlock_iothread();
3253 rcu_read_unlock();
3254 return val;
3257 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3258 MemTxAttrs attrs, MemTxResult *result)
3260 return address_space_lduw_internal(as, addr, attrs, result,
3261 DEVICE_NATIVE_ENDIAN);
3264 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3265 MemTxAttrs attrs, MemTxResult *result)
3267 return address_space_lduw_internal(as, addr, attrs, result,
3268 DEVICE_LITTLE_ENDIAN);
3271 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3272 MemTxAttrs attrs, MemTxResult *result)
3274 return address_space_lduw_internal(as, addr, attrs, result,
3275 DEVICE_BIG_ENDIAN);
3278 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3280 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3283 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3285 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3288 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3290 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3293 /* warning: addr must be aligned. The ram page is not masked as dirty
3294 and the code inside is not invalidated. It is useful if the dirty
3295 bits are used to track modified PTEs */
3296 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3297 MemTxAttrs attrs, MemTxResult *result)
3299 uint8_t *ptr;
3300 MemoryRegion *mr;
3301 hwaddr l = 4;
3302 hwaddr addr1;
3303 MemTxResult r;
3304 uint8_t dirty_log_mask;
3305 bool release_lock = false;
3307 rcu_read_lock();
3308 mr = address_space_translate(as, addr, &addr1, &l,
3309 true);
3310 if (l < 4 || !memory_access_is_direct(mr, true)) {
3311 release_lock |= prepare_mmio_access(mr);
3313 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3314 } else {
3315 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3316 stl_p(ptr, val);
3318 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3319 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3320 cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr,
3321 4, dirty_log_mask);
3322 r = MEMTX_OK;
3324 if (result) {
3325 *result = r;
3327 if (release_lock) {
3328 qemu_mutex_unlock_iothread();
3330 rcu_read_unlock();
3333 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3335 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3338 /* warning: addr must be aligned */
3339 static inline void address_space_stl_internal(AddressSpace *as,
3340 hwaddr addr, uint32_t val,
3341 MemTxAttrs attrs,
3342 MemTxResult *result,
3343 enum device_endian endian)
3345 uint8_t *ptr;
3346 MemoryRegion *mr;
3347 hwaddr l = 4;
3348 hwaddr addr1;
3349 MemTxResult r;
3350 bool release_lock = false;
3352 rcu_read_lock();
3353 mr = address_space_translate(as, addr, &addr1, &l,
3354 true);
3355 if (l < 4 || !memory_access_is_direct(mr, true)) {
3356 release_lock |= prepare_mmio_access(mr);
3358 #if defined(TARGET_WORDS_BIGENDIAN)
3359 if (endian == DEVICE_LITTLE_ENDIAN) {
3360 val = bswap32(val);
3362 #else
3363 if (endian == DEVICE_BIG_ENDIAN) {
3364 val = bswap32(val);
3366 #endif
3367 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3368 } else {
3369 /* RAM case */
3370 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3371 switch (endian) {
3372 case DEVICE_LITTLE_ENDIAN:
3373 stl_le_p(ptr, val);
3374 break;
3375 case DEVICE_BIG_ENDIAN:
3376 stl_be_p(ptr, val);
3377 break;
3378 default:
3379 stl_p(ptr, val);
3380 break;
3382 invalidate_and_set_dirty(mr, addr1, 4);
3383 r = MEMTX_OK;
3385 if (result) {
3386 *result = r;
3388 if (release_lock) {
3389 qemu_mutex_unlock_iothread();
3391 rcu_read_unlock();
3394 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3395 MemTxAttrs attrs, MemTxResult *result)
3397 address_space_stl_internal(as, addr, val, attrs, result,
3398 DEVICE_NATIVE_ENDIAN);
3401 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3402 MemTxAttrs attrs, MemTxResult *result)
3404 address_space_stl_internal(as, addr, val, attrs, result,
3405 DEVICE_LITTLE_ENDIAN);
3408 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3409 MemTxAttrs attrs, MemTxResult *result)
3411 address_space_stl_internal(as, addr, val, attrs, result,
3412 DEVICE_BIG_ENDIAN);
3415 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3417 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3420 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3422 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3425 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3427 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3430 /* XXX: optimize */
3431 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3432 MemTxAttrs attrs, MemTxResult *result)
3434 uint8_t v = val;
3435 MemTxResult r;
3437 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3438 if (result) {
3439 *result = r;
3443 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3445 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3448 /* warning: addr must be aligned */
3449 static inline void address_space_stw_internal(AddressSpace *as,
3450 hwaddr addr, uint32_t val,
3451 MemTxAttrs attrs,
3452 MemTxResult *result,
3453 enum device_endian endian)
3455 uint8_t *ptr;
3456 MemoryRegion *mr;
3457 hwaddr l = 2;
3458 hwaddr addr1;
3459 MemTxResult r;
3460 bool release_lock = false;
3462 rcu_read_lock();
3463 mr = address_space_translate(as, addr, &addr1, &l, true);
3464 if (l < 2 || !memory_access_is_direct(mr, true)) {
3465 release_lock |= prepare_mmio_access(mr);
3467 #if defined(TARGET_WORDS_BIGENDIAN)
3468 if (endian == DEVICE_LITTLE_ENDIAN) {
3469 val = bswap16(val);
3471 #else
3472 if (endian == DEVICE_BIG_ENDIAN) {
3473 val = bswap16(val);
3475 #endif
3476 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3477 } else {
3478 /* RAM case */
3479 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3480 switch (endian) {
3481 case DEVICE_LITTLE_ENDIAN:
3482 stw_le_p(ptr, val);
3483 break;
3484 case DEVICE_BIG_ENDIAN:
3485 stw_be_p(ptr, val);
3486 break;
3487 default:
3488 stw_p(ptr, val);
3489 break;
3491 invalidate_and_set_dirty(mr, addr1, 2);
3492 r = MEMTX_OK;
3494 if (result) {
3495 *result = r;
3497 if (release_lock) {
3498 qemu_mutex_unlock_iothread();
3500 rcu_read_unlock();
3503 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3504 MemTxAttrs attrs, MemTxResult *result)
3506 address_space_stw_internal(as, addr, val, attrs, result,
3507 DEVICE_NATIVE_ENDIAN);
3510 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3511 MemTxAttrs attrs, MemTxResult *result)
3513 address_space_stw_internal(as, addr, val, attrs, result,
3514 DEVICE_LITTLE_ENDIAN);
3517 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3518 MemTxAttrs attrs, MemTxResult *result)
3520 address_space_stw_internal(as, addr, val, attrs, result,
3521 DEVICE_BIG_ENDIAN);
3524 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3526 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3529 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3531 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3534 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3536 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3539 /* XXX: optimize */
3540 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3541 MemTxAttrs attrs, MemTxResult *result)
3543 MemTxResult r;
3544 val = tswap64(val);
3545 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3546 if (result) {
3547 *result = r;
3551 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3552 MemTxAttrs attrs, MemTxResult *result)
3554 MemTxResult r;
3555 val = cpu_to_le64(val);
3556 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3557 if (result) {
3558 *result = r;
3561 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3562 MemTxAttrs attrs, MemTxResult *result)
3564 MemTxResult r;
3565 val = cpu_to_be64(val);
3566 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3567 if (result) {
3568 *result = r;
3572 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3574 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3577 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3579 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3582 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3584 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3587 /* virtual memory access for debug (includes writing to ROM) */
3588 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3589 uint8_t *buf, int len, int is_write)
3591 int l;
3592 hwaddr phys_addr;
3593 target_ulong page;
3595 while (len > 0) {
3596 int asidx;
3597 MemTxAttrs attrs;
3599 page = addr & TARGET_PAGE_MASK;
3600 phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3601 asidx = cpu_asidx_from_attrs(cpu, attrs);
3602 /* if no physical page mapped, return an error */
3603 if (phys_addr == -1)
3604 return -1;
3605 l = (page + TARGET_PAGE_SIZE) - addr;
3606 if (l > len)
3607 l = len;
3608 phys_addr += (addr & ~TARGET_PAGE_MASK);
3609 if (is_write) {
3610 cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3611 phys_addr, buf, l);
3612 } else {
3613 address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3614 MEMTXATTRS_UNSPECIFIED,
3615 buf, l, 0);
3617 len -= l;
3618 buf += l;
3619 addr += l;
3621 return 0;
3625 * Allows code that needs to deal with migration bitmaps etc to still be built
3626 * target independent.
3628 size_t qemu_target_page_bits(void)
3630 return TARGET_PAGE_BITS;
3633 #endif
3636 * A helper function for the _utterly broken_ virtio device model to find out if
3637 * it's running on a big endian machine. Don't do this at home kids!
3639 bool target_words_bigendian(void);
3640 bool target_words_bigendian(void)
3642 #if defined(TARGET_WORDS_BIGENDIAN)
3643 return true;
3644 #else
3645 return false;
3646 #endif
3649 #ifndef CONFIG_USER_ONLY
3650 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3652 MemoryRegion*mr;
3653 hwaddr l = 1;
3654 bool res;
3656 rcu_read_lock();
3657 mr = address_space_translate(&address_space_memory,
3658 phys_addr, &phys_addr, &l, false);
3660 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3661 rcu_read_unlock();
3662 return res;
3665 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3667 RAMBlock *block;
3668 int ret = 0;
3670 rcu_read_lock();
3671 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3672 ret = func(block->idstr, block->host, block->offset,
3673 block->used_length, opaque);
3674 if (ret) {
3675 break;
3678 rcu_read_unlock();
3679 return ret;
3681 #endif