iotests: Move _filter_nbd into common.filter
[qemu.git] / exec.c
blob9e076bc323eace5ee85b4561f86042d73356489b
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #ifndef _WIN32
21 #include <sys/mman.h>
22 #endif
24 #include "qemu-common.h"
25 #include "cpu.h"
26 #include "tcg.h"
27 #include "hw/hw.h"
28 #if !defined(CONFIG_USER_ONLY)
29 #include "hw/boards.h"
30 #endif
31 #include "hw/qdev.h"
32 #include "sysemu/kvm.h"
33 #include "sysemu/sysemu.h"
34 #include "hw/xen/xen.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "qemu/error-report.h"
38 #include "exec/memory.h"
39 #include "sysemu/dma.h"
40 #include "exec/address-spaces.h"
41 #if defined(CONFIG_USER_ONLY)
42 #include <qemu.h>
43 #else /* !CONFIG_USER_ONLY */
44 #include "sysemu/xen-mapcache.h"
45 #include "trace.h"
46 #endif
47 #include "exec/cpu-all.h"
48 #include "qemu/rcu_queue.h"
49 #include "qemu/main-loop.h"
50 #include "translate-all.h"
51 #include "sysemu/replay.h"
53 #include "exec/memory-internal.h"
54 #include "exec/ram_addr.h"
56 #include "qemu/range.h"
57 #ifndef _WIN32
58 #include "qemu/mmap-alloc.h"
59 #endif
61 //#define DEBUG_SUBPAGE
63 #if !defined(CONFIG_USER_ONLY)
64 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
65 * are protected by the ramlist lock.
67 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
69 static MemoryRegion *system_memory;
70 static MemoryRegion *system_io;
72 AddressSpace address_space_io;
73 AddressSpace address_space_memory;
75 MemoryRegion io_mem_rom, io_mem_notdirty;
76 static MemoryRegion io_mem_unassigned;
78 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
79 #define RAM_PREALLOC (1 << 0)
81 /* RAM is mmap-ed with MAP_SHARED */
82 #define RAM_SHARED (1 << 1)
84 /* Only a portion of RAM (used_length) is actually used, and migrated.
85 * This used_length size can change across reboots.
87 #define RAM_RESIZEABLE (1 << 2)
89 #endif
91 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
92 /* current CPU in the current thread. It is only valid inside
93 cpu_exec() */
94 __thread CPUState *current_cpu;
95 /* 0 = Do not count executed instructions.
96 1 = Precise instruction counting.
97 2 = Adaptive rate instruction counting. */
98 int use_icount;
100 #if !defined(CONFIG_USER_ONLY)
102 typedef struct PhysPageEntry PhysPageEntry;
104 struct PhysPageEntry {
105 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
106 uint32_t skip : 6;
107 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
108 uint32_t ptr : 26;
111 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
113 /* Size of the L2 (and L3, etc) page tables. */
114 #define ADDR_SPACE_BITS 64
116 #define P_L2_BITS 9
117 #define P_L2_SIZE (1 << P_L2_BITS)
119 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
121 typedef PhysPageEntry Node[P_L2_SIZE];
123 typedef struct PhysPageMap {
124 struct rcu_head rcu;
126 unsigned sections_nb;
127 unsigned sections_nb_alloc;
128 unsigned nodes_nb;
129 unsigned nodes_nb_alloc;
130 Node *nodes;
131 MemoryRegionSection *sections;
132 } PhysPageMap;
134 struct AddressSpaceDispatch {
135 struct rcu_head rcu;
137 /* This is a multi-level map on the physical address space.
138 * The bottom level has pointers to MemoryRegionSections.
140 PhysPageEntry phys_map;
141 PhysPageMap map;
142 AddressSpace *as;
145 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
146 typedef struct subpage_t {
147 MemoryRegion iomem;
148 AddressSpace *as;
149 hwaddr base;
150 uint16_t sub_section[TARGET_PAGE_SIZE];
151 } subpage_t;
153 #define PHYS_SECTION_UNASSIGNED 0
154 #define PHYS_SECTION_NOTDIRTY 1
155 #define PHYS_SECTION_ROM 2
156 #define PHYS_SECTION_WATCH 3
158 static void io_mem_init(void);
159 static void memory_map_init(void);
160 static void tcg_commit(MemoryListener *listener);
162 static MemoryRegion io_mem_watch;
165 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
166 * @cpu: the CPU whose AddressSpace this is
167 * @as: the AddressSpace itself
168 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
169 * @tcg_as_listener: listener for tracking changes to the AddressSpace
171 struct CPUAddressSpace {
172 CPUState *cpu;
173 AddressSpace *as;
174 struct AddressSpaceDispatch *memory_dispatch;
175 MemoryListener tcg_as_listener;
178 #endif
180 #if !defined(CONFIG_USER_ONLY)
182 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
184 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
185 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
186 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
187 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
191 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
193 unsigned i;
194 uint32_t ret;
195 PhysPageEntry e;
196 PhysPageEntry *p;
198 ret = map->nodes_nb++;
199 p = map->nodes[ret];
200 assert(ret != PHYS_MAP_NODE_NIL);
201 assert(ret != map->nodes_nb_alloc);
203 e.skip = leaf ? 0 : 1;
204 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
205 for (i = 0; i < P_L2_SIZE; ++i) {
206 memcpy(&p[i], &e, sizeof(e));
208 return ret;
211 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
212 hwaddr *index, hwaddr *nb, uint16_t leaf,
213 int level)
215 PhysPageEntry *p;
216 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
218 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
219 lp->ptr = phys_map_node_alloc(map, level == 0);
221 p = map->nodes[lp->ptr];
222 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
224 while (*nb && lp < &p[P_L2_SIZE]) {
225 if ((*index & (step - 1)) == 0 && *nb >= step) {
226 lp->skip = 0;
227 lp->ptr = leaf;
228 *index += step;
229 *nb -= step;
230 } else {
231 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
233 ++lp;
237 static void phys_page_set(AddressSpaceDispatch *d,
238 hwaddr index, hwaddr nb,
239 uint16_t leaf)
241 /* Wildly overreserve - it doesn't matter much. */
242 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
244 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
247 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
248 * and update our entry so we can skip it and go directly to the destination.
250 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
252 unsigned valid_ptr = P_L2_SIZE;
253 int valid = 0;
254 PhysPageEntry *p;
255 int i;
257 if (lp->ptr == PHYS_MAP_NODE_NIL) {
258 return;
261 p = nodes[lp->ptr];
262 for (i = 0; i < P_L2_SIZE; i++) {
263 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
264 continue;
267 valid_ptr = i;
268 valid++;
269 if (p[i].skip) {
270 phys_page_compact(&p[i], nodes, compacted);
274 /* We can only compress if there's only one child. */
275 if (valid != 1) {
276 return;
279 assert(valid_ptr < P_L2_SIZE);
281 /* Don't compress if it won't fit in the # of bits we have. */
282 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
283 return;
286 lp->ptr = p[valid_ptr].ptr;
287 if (!p[valid_ptr].skip) {
288 /* If our only child is a leaf, make this a leaf. */
289 /* By design, we should have made this node a leaf to begin with so we
290 * should never reach here.
291 * But since it's so simple to handle this, let's do it just in case we
292 * change this rule.
294 lp->skip = 0;
295 } else {
296 lp->skip += p[valid_ptr].skip;
300 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
302 DECLARE_BITMAP(compacted, nodes_nb);
304 if (d->phys_map.skip) {
305 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
309 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
310 Node *nodes, MemoryRegionSection *sections)
312 PhysPageEntry *p;
313 hwaddr index = addr >> TARGET_PAGE_BITS;
314 int i;
316 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
317 if (lp.ptr == PHYS_MAP_NODE_NIL) {
318 return &sections[PHYS_SECTION_UNASSIGNED];
320 p = nodes[lp.ptr];
321 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
324 if (sections[lp.ptr].size.hi ||
325 range_covers_byte(sections[lp.ptr].offset_within_address_space,
326 sections[lp.ptr].size.lo, addr)) {
327 return &sections[lp.ptr];
328 } else {
329 return &sections[PHYS_SECTION_UNASSIGNED];
333 bool memory_region_is_unassigned(MemoryRegion *mr)
335 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
336 && mr != &io_mem_watch;
339 /* Called from RCU critical section */
340 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
341 hwaddr addr,
342 bool resolve_subpage)
344 MemoryRegionSection *section;
345 subpage_t *subpage;
347 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
348 if (resolve_subpage && section->mr->subpage) {
349 subpage = container_of(section->mr, subpage_t, iomem);
350 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
352 return section;
355 /* Called from RCU critical section */
356 static MemoryRegionSection *
357 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
358 hwaddr *plen, bool resolve_subpage)
360 MemoryRegionSection *section;
361 MemoryRegion *mr;
362 Int128 diff;
364 section = address_space_lookup_region(d, addr, resolve_subpage);
365 /* Compute offset within MemoryRegionSection */
366 addr -= section->offset_within_address_space;
368 /* Compute offset within MemoryRegion */
369 *xlat = addr + section->offset_within_region;
371 mr = section->mr;
373 /* MMIO registers can be expected to perform full-width accesses based only
374 * on their address, without considering adjacent registers that could
375 * decode to completely different MemoryRegions. When such registers
376 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
377 * regions overlap wildly. For this reason we cannot clamp the accesses
378 * here.
380 * If the length is small (as is the case for address_space_ldl/stl),
381 * everything works fine. If the incoming length is large, however,
382 * the caller really has to do the clamping through memory_access_size.
384 if (memory_region_is_ram(mr)) {
385 diff = int128_sub(section->size, int128_make64(addr));
386 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
388 return section;
391 /* Called from RCU critical section */
392 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
393 hwaddr *xlat, hwaddr *plen,
394 bool is_write)
396 IOMMUTLBEntry iotlb;
397 MemoryRegionSection *section;
398 MemoryRegion *mr;
400 for (;;) {
401 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
402 section = address_space_translate_internal(d, addr, &addr, plen, true);
403 mr = section->mr;
405 if (!mr->iommu_ops) {
406 break;
409 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
410 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
411 | (addr & iotlb.addr_mask));
412 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
413 if (!(iotlb.perm & (1 << is_write))) {
414 mr = &io_mem_unassigned;
415 break;
418 as = iotlb.target_as;
421 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
422 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
423 *plen = MIN(page, *plen);
426 *xlat = addr;
427 return mr;
430 /* Called from RCU critical section */
431 MemoryRegionSection *
432 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
433 hwaddr *xlat, hwaddr *plen)
435 MemoryRegionSection *section;
436 AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;
438 section = address_space_translate_internal(d, addr, xlat, plen, false);
440 assert(!section->mr->iommu_ops);
441 return section;
443 #endif
445 #if !defined(CONFIG_USER_ONLY)
447 static int cpu_common_post_load(void *opaque, int version_id)
449 CPUState *cpu = opaque;
451 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
452 version_id is increased. */
453 cpu->interrupt_request &= ~0x01;
454 tlb_flush(cpu, 1);
456 return 0;
459 static int cpu_common_pre_load(void *opaque)
461 CPUState *cpu = opaque;
463 cpu->exception_index = -1;
465 return 0;
468 static bool cpu_common_exception_index_needed(void *opaque)
470 CPUState *cpu = opaque;
472 return tcg_enabled() && cpu->exception_index != -1;
475 static const VMStateDescription vmstate_cpu_common_exception_index = {
476 .name = "cpu_common/exception_index",
477 .version_id = 1,
478 .minimum_version_id = 1,
479 .needed = cpu_common_exception_index_needed,
480 .fields = (VMStateField[]) {
481 VMSTATE_INT32(exception_index, CPUState),
482 VMSTATE_END_OF_LIST()
486 static bool cpu_common_crash_occurred_needed(void *opaque)
488 CPUState *cpu = opaque;
490 return cpu->crash_occurred;
493 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
494 .name = "cpu_common/crash_occurred",
495 .version_id = 1,
496 .minimum_version_id = 1,
497 .needed = cpu_common_crash_occurred_needed,
498 .fields = (VMStateField[]) {
499 VMSTATE_BOOL(crash_occurred, CPUState),
500 VMSTATE_END_OF_LIST()
504 const VMStateDescription vmstate_cpu_common = {
505 .name = "cpu_common",
506 .version_id = 1,
507 .minimum_version_id = 1,
508 .pre_load = cpu_common_pre_load,
509 .post_load = cpu_common_post_load,
510 .fields = (VMStateField[]) {
511 VMSTATE_UINT32(halted, CPUState),
512 VMSTATE_UINT32(interrupt_request, CPUState),
513 VMSTATE_END_OF_LIST()
515 .subsections = (const VMStateDescription*[]) {
516 &vmstate_cpu_common_exception_index,
517 &vmstate_cpu_common_crash_occurred,
518 NULL
522 #endif
524 CPUState *qemu_get_cpu(int index)
526 CPUState *cpu;
528 CPU_FOREACH(cpu) {
529 if (cpu->cpu_index == index) {
530 return cpu;
534 return NULL;
537 #if !defined(CONFIG_USER_ONLY)
538 void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
540 CPUAddressSpace *newas;
542 /* Target code should have set num_ases before calling us */
543 assert(asidx < cpu->num_ases);
545 if (asidx == 0) {
546 /* address space 0 gets the convenience alias */
547 cpu->as = as;
550 /* KVM cannot currently support multiple address spaces. */
551 assert(asidx == 0 || !kvm_enabled());
553 if (!cpu->cpu_ases) {
554 cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
557 newas = &cpu->cpu_ases[asidx];
558 newas->cpu = cpu;
559 newas->as = as;
560 if (tcg_enabled()) {
561 newas->tcg_as_listener.commit = tcg_commit;
562 memory_listener_register(&newas->tcg_as_listener, as);
566 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
568 /* Return the AddressSpace corresponding to the specified index */
569 return cpu->cpu_ases[asidx].as;
571 #endif
573 #ifndef CONFIG_USER_ONLY
574 static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
576 static int cpu_get_free_index(Error **errp)
578 int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
580 if (cpu >= MAX_CPUMASK_BITS) {
581 error_setg(errp, "Trying to use more CPUs than max of %d",
582 MAX_CPUMASK_BITS);
583 return -1;
586 bitmap_set(cpu_index_map, cpu, 1);
587 return cpu;
590 void cpu_exec_exit(CPUState *cpu)
592 if (cpu->cpu_index == -1) {
593 /* cpu_index was never allocated by this @cpu or was already freed. */
594 return;
597 bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
598 cpu->cpu_index = -1;
600 #else
602 static int cpu_get_free_index(Error **errp)
604 CPUState *some_cpu;
605 int cpu_index = 0;
607 CPU_FOREACH(some_cpu) {
608 cpu_index++;
610 return cpu_index;
613 void cpu_exec_exit(CPUState *cpu)
616 #endif
618 void cpu_exec_init(CPUState *cpu, Error **errp)
620 CPUClass *cc = CPU_GET_CLASS(cpu);
621 int cpu_index;
622 Error *local_err = NULL;
624 cpu->as = NULL;
625 cpu->num_ases = 0;
627 #ifndef CONFIG_USER_ONLY
628 cpu->thread_id = qemu_get_thread_id();
630 /* This is a softmmu CPU object, so create a property for it
631 * so users can wire up its memory. (This can't go in qom/cpu.c
632 * because that file is compiled only once for both user-mode
633 * and system builds.) The default if no link is set up is to use
634 * the system address space.
636 object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
637 (Object **)&cpu->memory,
638 qdev_prop_allow_set_link_before_realize,
639 OBJ_PROP_LINK_UNREF_ON_RELEASE,
640 &error_abort);
641 cpu->memory = system_memory;
642 object_ref(OBJECT(cpu->memory));
643 #endif
645 #if defined(CONFIG_USER_ONLY)
646 cpu_list_lock();
647 #endif
648 cpu_index = cpu->cpu_index = cpu_get_free_index(&local_err);
649 if (local_err) {
650 error_propagate(errp, local_err);
651 #if defined(CONFIG_USER_ONLY)
652 cpu_list_unlock();
653 #endif
654 return;
656 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
657 #if defined(CONFIG_USER_ONLY)
658 cpu_list_unlock();
659 #endif
660 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
661 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
663 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
664 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
665 cpu_save, cpu_load, cpu->env_ptr);
666 assert(cc->vmsd == NULL);
667 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
668 #endif
669 if (cc->vmsd != NULL) {
670 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
674 #if defined(CONFIG_USER_ONLY)
675 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
677 tb_invalidate_phys_page_range(pc, pc + 1, 0);
679 #else
680 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
682 MemTxAttrs attrs;
683 hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
684 int asidx = cpu_asidx_from_attrs(cpu, attrs);
685 if (phys != -1) {
686 tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
687 phys | (pc & ~TARGET_PAGE_MASK));
690 #endif
692 #if defined(CONFIG_USER_ONLY)
693 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
698 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
699 int flags)
701 return -ENOSYS;
704 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
708 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
709 int flags, CPUWatchpoint **watchpoint)
711 return -ENOSYS;
713 #else
714 /* Add a watchpoint. */
715 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
716 int flags, CPUWatchpoint **watchpoint)
718 CPUWatchpoint *wp;
720 /* forbid ranges which are empty or run off the end of the address space */
721 if (len == 0 || (addr + len - 1) < addr) {
722 error_report("tried to set invalid watchpoint at %"
723 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
724 return -EINVAL;
726 wp = g_malloc(sizeof(*wp));
728 wp->vaddr = addr;
729 wp->len = len;
730 wp->flags = flags;
732 /* keep all GDB-injected watchpoints in front */
733 if (flags & BP_GDB) {
734 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
735 } else {
736 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
739 tlb_flush_page(cpu, addr);
741 if (watchpoint)
742 *watchpoint = wp;
743 return 0;
746 /* Remove a specific watchpoint. */
747 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
748 int flags)
750 CPUWatchpoint *wp;
752 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
753 if (addr == wp->vaddr && len == wp->len
754 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
755 cpu_watchpoint_remove_by_ref(cpu, wp);
756 return 0;
759 return -ENOENT;
762 /* Remove a specific watchpoint by reference. */
763 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
765 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
767 tlb_flush_page(cpu, watchpoint->vaddr);
769 g_free(watchpoint);
772 /* Remove all matching watchpoints. */
773 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
775 CPUWatchpoint *wp, *next;
777 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
778 if (wp->flags & mask) {
779 cpu_watchpoint_remove_by_ref(cpu, wp);
784 /* Return true if this watchpoint address matches the specified
785 * access (ie the address range covered by the watchpoint overlaps
786 * partially or completely with the address range covered by the
787 * access).
789 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
790 vaddr addr,
791 vaddr len)
793 /* We know the lengths are non-zero, but a little caution is
794 * required to avoid errors in the case where the range ends
795 * exactly at the top of the address space and so addr + len
796 * wraps round to zero.
798 vaddr wpend = wp->vaddr + wp->len - 1;
799 vaddr addrend = addr + len - 1;
801 return !(addr > wpend || wp->vaddr > addrend);
804 #endif
806 /* Add a breakpoint. */
807 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
808 CPUBreakpoint **breakpoint)
810 CPUBreakpoint *bp;
812 bp = g_malloc(sizeof(*bp));
814 bp->pc = pc;
815 bp->flags = flags;
817 /* keep all GDB-injected breakpoints in front */
818 if (flags & BP_GDB) {
819 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
820 } else {
821 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
824 breakpoint_invalidate(cpu, pc);
826 if (breakpoint) {
827 *breakpoint = bp;
829 return 0;
832 /* Remove a specific breakpoint. */
833 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
835 CPUBreakpoint *bp;
837 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
838 if (bp->pc == pc && bp->flags == flags) {
839 cpu_breakpoint_remove_by_ref(cpu, bp);
840 return 0;
843 return -ENOENT;
846 /* Remove a specific breakpoint by reference. */
847 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
849 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
851 breakpoint_invalidate(cpu, breakpoint->pc);
853 g_free(breakpoint);
856 /* Remove all matching breakpoints. */
857 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
859 CPUBreakpoint *bp, *next;
861 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
862 if (bp->flags & mask) {
863 cpu_breakpoint_remove_by_ref(cpu, bp);
868 /* enable or disable single step mode. EXCP_DEBUG is returned by the
869 CPU loop after each instruction */
870 void cpu_single_step(CPUState *cpu, int enabled)
872 if (cpu->singlestep_enabled != enabled) {
873 cpu->singlestep_enabled = enabled;
874 if (kvm_enabled()) {
875 kvm_update_guest_debug(cpu, 0);
876 } else {
877 /* must flush all the translated code to avoid inconsistencies */
878 /* XXX: only flush what is necessary */
879 tb_flush(cpu);
884 void cpu_abort(CPUState *cpu, const char *fmt, ...)
886 va_list ap;
887 va_list ap2;
889 va_start(ap, fmt);
890 va_copy(ap2, ap);
891 fprintf(stderr, "qemu: fatal: ");
892 vfprintf(stderr, fmt, ap);
893 fprintf(stderr, "\n");
894 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
895 if (qemu_log_separate()) {
896 qemu_log("qemu: fatal: ");
897 qemu_log_vprintf(fmt, ap2);
898 qemu_log("\n");
899 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
900 qemu_log_flush();
901 qemu_log_close();
903 va_end(ap2);
904 va_end(ap);
905 replay_finish();
906 #if defined(CONFIG_USER_ONLY)
908 struct sigaction act;
909 sigfillset(&act.sa_mask);
910 act.sa_handler = SIG_DFL;
911 sigaction(SIGABRT, &act, NULL);
913 #endif
914 abort();
917 #if !defined(CONFIG_USER_ONLY)
918 /* Called from RCU critical section */
919 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
921 RAMBlock *block;
923 block = atomic_rcu_read(&ram_list.mru_block);
924 if (block && addr - block->offset < block->max_length) {
925 return block;
927 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
928 if (addr - block->offset < block->max_length) {
929 goto found;
933 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
934 abort();
936 found:
937 /* It is safe to write mru_block outside the iothread lock. This
938 * is what happens:
940 * mru_block = xxx
941 * rcu_read_unlock()
942 * xxx removed from list
943 * rcu_read_lock()
944 * read mru_block
945 * mru_block = NULL;
946 * call_rcu(reclaim_ramblock, xxx);
947 * rcu_read_unlock()
949 * atomic_rcu_set is not needed here. The block was already published
950 * when it was placed into the list. Here we're just making an extra
951 * copy of the pointer.
953 ram_list.mru_block = block;
954 return block;
957 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
959 CPUState *cpu;
960 ram_addr_t start1;
961 RAMBlock *block;
962 ram_addr_t end;
964 end = TARGET_PAGE_ALIGN(start + length);
965 start &= TARGET_PAGE_MASK;
967 rcu_read_lock();
968 block = qemu_get_ram_block(start);
969 assert(block == qemu_get_ram_block(end - 1));
970 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
971 CPU_FOREACH(cpu) {
972 tlb_reset_dirty(cpu, start1, length);
974 rcu_read_unlock();
977 /* Note: start and end must be within the same ram block. */
978 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
979 ram_addr_t length,
980 unsigned client)
982 unsigned long end, page;
983 bool dirty;
985 if (length == 0) {
986 return false;
989 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
990 page = start >> TARGET_PAGE_BITS;
991 dirty = bitmap_test_and_clear_atomic(ram_list.dirty_memory[client],
992 page, end - page);
994 if (dirty && tcg_enabled()) {
995 tlb_reset_dirty_range_all(start, length);
998 return dirty;
1001 /* Called from RCU critical section */
1002 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1003 MemoryRegionSection *section,
1004 target_ulong vaddr,
1005 hwaddr paddr, hwaddr xlat,
1006 int prot,
1007 target_ulong *address)
1009 hwaddr iotlb;
1010 CPUWatchpoint *wp;
1012 if (memory_region_is_ram(section->mr)) {
1013 /* Normal RAM. */
1014 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1015 + xlat;
1016 if (!section->readonly) {
1017 iotlb |= PHYS_SECTION_NOTDIRTY;
1018 } else {
1019 iotlb |= PHYS_SECTION_ROM;
1021 } else {
1022 AddressSpaceDispatch *d;
1024 d = atomic_rcu_read(&section->address_space->dispatch);
1025 iotlb = section - d->map.sections;
1026 iotlb += xlat;
1029 /* Make accesses to pages with watchpoints go via the
1030 watchpoint trap routines. */
1031 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1032 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1033 /* Avoid trapping reads of pages with a write breakpoint. */
1034 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1035 iotlb = PHYS_SECTION_WATCH + paddr;
1036 *address |= TLB_MMIO;
1037 break;
1042 return iotlb;
1044 #endif /* defined(CONFIG_USER_ONLY) */
1046 #if !defined(CONFIG_USER_ONLY)
1048 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1049 uint16_t section);
1050 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1052 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1053 qemu_anon_ram_alloc;
1056 * Set a custom physical guest memory alloator.
1057 * Accelerators with unusual needs may need this. Hopefully, we can
1058 * get rid of it eventually.
1060 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1062 phys_mem_alloc = alloc;
1065 static uint16_t phys_section_add(PhysPageMap *map,
1066 MemoryRegionSection *section)
1068 /* The physical section number is ORed with a page-aligned
1069 * pointer to produce the iotlb entries. Thus it should
1070 * never overflow into the page-aligned value.
1072 assert(map->sections_nb < TARGET_PAGE_SIZE);
1074 if (map->sections_nb == map->sections_nb_alloc) {
1075 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1076 map->sections = g_renew(MemoryRegionSection, map->sections,
1077 map->sections_nb_alloc);
1079 map->sections[map->sections_nb] = *section;
1080 memory_region_ref(section->mr);
1081 return map->sections_nb++;
1084 static void phys_section_destroy(MemoryRegion *mr)
1086 bool have_sub_page = mr->subpage;
1088 memory_region_unref(mr);
1090 if (have_sub_page) {
1091 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1092 object_unref(OBJECT(&subpage->iomem));
1093 g_free(subpage);
1097 static void phys_sections_free(PhysPageMap *map)
1099 while (map->sections_nb > 0) {
1100 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1101 phys_section_destroy(section->mr);
1103 g_free(map->sections);
1104 g_free(map->nodes);
1107 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1109 subpage_t *subpage;
1110 hwaddr base = section->offset_within_address_space
1111 & TARGET_PAGE_MASK;
1112 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1113 d->map.nodes, d->map.sections);
1114 MemoryRegionSection subsection = {
1115 .offset_within_address_space = base,
1116 .size = int128_make64(TARGET_PAGE_SIZE),
1118 hwaddr start, end;
1120 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1122 if (!(existing->mr->subpage)) {
1123 subpage = subpage_init(d->as, base);
1124 subsection.address_space = d->as;
1125 subsection.mr = &subpage->iomem;
1126 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1127 phys_section_add(&d->map, &subsection));
1128 } else {
1129 subpage = container_of(existing->mr, subpage_t, iomem);
1131 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1132 end = start + int128_get64(section->size) - 1;
1133 subpage_register(subpage, start, end,
1134 phys_section_add(&d->map, section));
1138 static void register_multipage(AddressSpaceDispatch *d,
1139 MemoryRegionSection *section)
1141 hwaddr start_addr = section->offset_within_address_space;
1142 uint16_t section_index = phys_section_add(&d->map, section);
1143 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1144 TARGET_PAGE_BITS));
1146 assert(num_pages);
1147 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1150 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1152 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1153 AddressSpaceDispatch *d = as->next_dispatch;
1154 MemoryRegionSection now = *section, remain = *section;
1155 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1157 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1158 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1159 - now.offset_within_address_space;
1161 now.size = int128_min(int128_make64(left), now.size);
1162 register_subpage(d, &now);
1163 } else {
1164 now.size = int128_zero();
1166 while (int128_ne(remain.size, now.size)) {
1167 remain.size = int128_sub(remain.size, now.size);
1168 remain.offset_within_address_space += int128_get64(now.size);
1169 remain.offset_within_region += int128_get64(now.size);
1170 now = remain;
1171 if (int128_lt(remain.size, page_size)) {
1172 register_subpage(d, &now);
1173 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1174 now.size = page_size;
1175 register_subpage(d, &now);
1176 } else {
1177 now.size = int128_and(now.size, int128_neg(page_size));
1178 register_multipage(d, &now);
1183 void qemu_flush_coalesced_mmio_buffer(void)
1185 if (kvm_enabled())
1186 kvm_flush_coalesced_mmio_buffer();
1189 void qemu_mutex_lock_ramlist(void)
1191 qemu_mutex_lock(&ram_list.mutex);
1194 void qemu_mutex_unlock_ramlist(void)
1196 qemu_mutex_unlock(&ram_list.mutex);
1199 #ifdef __linux__
1201 #include <sys/vfs.h>
1203 #define HUGETLBFS_MAGIC 0x958458f6
1205 static long gethugepagesize(const char *path, Error **errp)
1207 struct statfs fs;
1208 int ret;
1210 do {
1211 ret = statfs(path, &fs);
1212 } while (ret != 0 && errno == EINTR);
1214 if (ret != 0) {
1215 error_setg_errno(errp, errno, "failed to get page size of file %s",
1216 path);
1217 return 0;
1220 return fs.f_bsize;
1223 static void *file_ram_alloc(RAMBlock *block,
1224 ram_addr_t memory,
1225 const char *path,
1226 Error **errp)
1228 struct stat st;
1229 char *filename;
1230 char *sanitized_name;
1231 char *c;
1232 void *area;
1233 int fd;
1234 uint64_t hpagesize;
1235 Error *local_err = NULL;
1237 hpagesize = gethugepagesize(path, &local_err);
1238 if (local_err) {
1239 error_propagate(errp, local_err);
1240 goto error;
1242 block->mr->align = hpagesize;
1244 if (memory < hpagesize) {
1245 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1246 "or larger than huge page size 0x%" PRIx64,
1247 memory, hpagesize);
1248 goto error;
1251 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1252 error_setg(errp,
1253 "host lacks kvm mmu notifiers, -mem-path unsupported");
1254 goto error;
1257 if (!stat(path, &st) && S_ISDIR(st.st_mode)) {
1258 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1259 sanitized_name = g_strdup(memory_region_name(block->mr));
1260 for (c = sanitized_name; *c != '\0'; c++) {
1261 if (*c == '/') {
1262 *c = '_';
1266 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1267 sanitized_name);
1268 g_free(sanitized_name);
1270 fd = mkstemp(filename);
1271 if (fd >= 0) {
1272 unlink(filename);
1274 g_free(filename);
1275 } else {
1276 fd = open(path, O_RDWR | O_CREAT, 0644);
1279 if (fd < 0) {
1280 error_setg_errno(errp, errno,
1281 "unable to create backing store for hugepages");
1282 goto error;
1285 memory = ROUND_UP(memory, hpagesize);
1288 * ftruncate is not supported by hugetlbfs in older
1289 * hosts, so don't bother bailing out on errors.
1290 * If anything goes wrong with it under other filesystems,
1291 * mmap will fail.
1293 if (ftruncate(fd, memory)) {
1294 perror("ftruncate");
1297 area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
1298 if (area == MAP_FAILED) {
1299 error_setg_errno(errp, errno,
1300 "unable to map backing store for hugepages");
1301 close(fd);
1302 goto error;
1305 if (mem_prealloc) {
1306 os_mem_prealloc(fd, area, memory);
1309 block->fd = fd;
1310 return area;
1312 error:
1313 return NULL;
1315 #endif
1317 /* Called with the ramlist lock held. */
1318 static ram_addr_t find_ram_offset(ram_addr_t size)
1320 RAMBlock *block, *next_block;
1321 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1323 assert(size != 0); /* it would hand out same offset multiple times */
1325 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1326 return 0;
1329 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1330 ram_addr_t end, next = RAM_ADDR_MAX;
1332 end = block->offset + block->max_length;
1334 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1335 if (next_block->offset >= end) {
1336 next = MIN(next, next_block->offset);
1339 if (next - end >= size && next - end < mingap) {
1340 offset = end;
1341 mingap = next - end;
1345 if (offset == RAM_ADDR_MAX) {
1346 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1347 (uint64_t)size);
1348 abort();
1351 return offset;
1354 ram_addr_t last_ram_offset(void)
1356 RAMBlock *block;
1357 ram_addr_t last = 0;
1359 rcu_read_lock();
1360 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1361 last = MAX(last, block->offset + block->max_length);
1363 rcu_read_unlock();
1364 return last;
1367 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1369 int ret;
1371 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1372 if (!machine_dump_guest_core(current_machine)) {
1373 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1374 if (ret) {
1375 perror("qemu_madvise");
1376 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1377 "but dump_guest_core=off specified\n");
1382 /* Called within an RCU critical section, or while the ramlist lock
1383 * is held.
1385 static RAMBlock *find_ram_block(ram_addr_t addr)
1387 RAMBlock *block;
1389 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1390 if (block->offset == addr) {
1391 return block;
1395 return NULL;
1398 const char *qemu_ram_get_idstr(RAMBlock *rb)
1400 return rb->idstr;
1403 /* Called with iothread lock held. */
1404 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1406 RAMBlock *new_block, *block;
1408 rcu_read_lock();
1409 new_block = find_ram_block(addr);
1410 assert(new_block);
1411 assert(!new_block->idstr[0]);
1413 if (dev) {
1414 char *id = qdev_get_dev_path(dev);
1415 if (id) {
1416 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1417 g_free(id);
1420 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1422 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1423 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1424 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1425 new_block->idstr);
1426 abort();
1429 rcu_read_unlock();
1432 /* Called with iothread lock held. */
1433 void qemu_ram_unset_idstr(ram_addr_t addr)
1435 RAMBlock *block;
1437 /* FIXME: arch_init.c assumes that this is not called throughout
1438 * migration. Ignore the problem since hot-unplug during migration
1439 * does not work anyway.
1442 rcu_read_lock();
1443 block = find_ram_block(addr);
1444 if (block) {
1445 memset(block->idstr, 0, sizeof(block->idstr));
1447 rcu_read_unlock();
1450 static int memory_try_enable_merging(void *addr, size_t len)
1452 if (!machine_mem_merge(current_machine)) {
1453 /* disabled by the user */
1454 return 0;
1457 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1460 /* Only legal before guest might have detected the memory size: e.g. on
1461 * incoming migration, or right after reset.
1463 * As memory core doesn't know how is memory accessed, it is up to
1464 * resize callback to update device state and/or add assertions to detect
1465 * misuse, if necessary.
1467 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1469 RAMBlock *block = find_ram_block(base);
1471 assert(block);
1473 newsize = HOST_PAGE_ALIGN(newsize);
1475 if (block->used_length == newsize) {
1476 return 0;
1479 if (!(block->flags & RAM_RESIZEABLE)) {
1480 error_setg_errno(errp, EINVAL,
1481 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1482 " in != 0x" RAM_ADDR_FMT, block->idstr,
1483 newsize, block->used_length);
1484 return -EINVAL;
1487 if (block->max_length < newsize) {
1488 error_setg_errno(errp, EINVAL,
1489 "Length too large: %s: 0x" RAM_ADDR_FMT
1490 " > 0x" RAM_ADDR_FMT, block->idstr,
1491 newsize, block->max_length);
1492 return -EINVAL;
1495 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1496 block->used_length = newsize;
1497 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1498 DIRTY_CLIENTS_ALL);
1499 memory_region_set_size(block->mr, newsize);
1500 if (block->resized) {
1501 block->resized(block->idstr, newsize, block->host);
1503 return 0;
1506 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1508 RAMBlock *block;
1509 RAMBlock *last_block = NULL;
1510 ram_addr_t old_ram_size, new_ram_size;
1511 Error *err = NULL;
1513 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1515 qemu_mutex_lock_ramlist();
1516 new_block->offset = find_ram_offset(new_block->max_length);
1518 if (!new_block->host) {
1519 if (xen_enabled()) {
1520 xen_ram_alloc(new_block->offset, new_block->max_length,
1521 new_block->mr, &err);
1522 if (err) {
1523 error_propagate(errp, err);
1524 qemu_mutex_unlock_ramlist();
1525 return -1;
1527 } else {
1528 new_block->host = phys_mem_alloc(new_block->max_length,
1529 &new_block->mr->align);
1530 if (!new_block->host) {
1531 error_setg_errno(errp, errno,
1532 "cannot set up guest memory '%s'",
1533 memory_region_name(new_block->mr));
1534 qemu_mutex_unlock_ramlist();
1535 return -1;
1537 memory_try_enable_merging(new_block->host, new_block->max_length);
1541 new_ram_size = MAX(old_ram_size,
1542 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1543 if (new_ram_size > old_ram_size) {
1544 migration_bitmap_extend(old_ram_size, new_ram_size);
1546 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1547 * QLIST (which has an RCU-friendly variant) does not have insertion at
1548 * tail, so save the last element in last_block.
1550 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1551 last_block = block;
1552 if (block->max_length < new_block->max_length) {
1553 break;
1556 if (block) {
1557 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1558 } else if (last_block) {
1559 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1560 } else { /* list is empty */
1561 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1563 ram_list.mru_block = NULL;
1565 /* Write list before version */
1566 smp_wmb();
1567 ram_list.version++;
1568 qemu_mutex_unlock_ramlist();
1570 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1572 if (new_ram_size > old_ram_size) {
1573 int i;
1575 /* ram_list.dirty_memory[] is protected by the iothread lock. */
1576 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1577 ram_list.dirty_memory[i] =
1578 bitmap_zero_extend(ram_list.dirty_memory[i],
1579 old_ram_size, new_ram_size);
1582 cpu_physical_memory_set_dirty_range(new_block->offset,
1583 new_block->used_length,
1584 DIRTY_CLIENTS_ALL);
1586 if (new_block->host) {
1587 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1588 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1589 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1590 if (kvm_enabled()) {
1591 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1595 return new_block->offset;
1598 #ifdef __linux__
1599 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1600 bool share, const char *mem_path,
1601 Error **errp)
1603 RAMBlock *new_block;
1604 ram_addr_t addr;
1605 Error *local_err = NULL;
1607 if (xen_enabled()) {
1608 error_setg(errp, "-mem-path not supported with Xen");
1609 return -1;
1612 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1614 * file_ram_alloc() needs to allocate just like
1615 * phys_mem_alloc, but we haven't bothered to provide
1616 * a hook there.
1618 error_setg(errp,
1619 "-mem-path not supported with this accelerator");
1620 return -1;
1623 size = HOST_PAGE_ALIGN(size);
1624 new_block = g_malloc0(sizeof(*new_block));
1625 new_block->mr = mr;
1626 new_block->used_length = size;
1627 new_block->max_length = size;
1628 new_block->flags = share ? RAM_SHARED : 0;
1629 new_block->host = file_ram_alloc(new_block, size,
1630 mem_path, errp);
1631 if (!new_block->host) {
1632 g_free(new_block);
1633 return -1;
1636 addr = ram_block_add(new_block, &local_err);
1637 if (local_err) {
1638 g_free(new_block);
1639 error_propagate(errp, local_err);
1640 return -1;
1642 return addr;
1644 #endif
1646 static
1647 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1648 void (*resized)(const char*,
1649 uint64_t length,
1650 void *host),
1651 void *host, bool resizeable,
1652 MemoryRegion *mr, Error **errp)
1654 RAMBlock *new_block;
1655 ram_addr_t addr;
1656 Error *local_err = NULL;
1658 size = HOST_PAGE_ALIGN(size);
1659 max_size = HOST_PAGE_ALIGN(max_size);
1660 new_block = g_malloc0(sizeof(*new_block));
1661 new_block->mr = mr;
1662 new_block->resized = resized;
1663 new_block->used_length = size;
1664 new_block->max_length = max_size;
1665 assert(max_size >= size);
1666 new_block->fd = -1;
1667 new_block->host = host;
1668 if (host) {
1669 new_block->flags |= RAM_PREALLOC;
1671 if (resizeable) {
1672 new_block->flags |= RAM_RESIZEABLE;
1674 addr = ram_block_add(new_block, &local_err);
1675 if (local_err) {
1676 g_free(new_block);
1677 error_propagate(errp, local_err);
1678 return -1;
1680 return addr;
1683 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1684 MemoryRegion *mr, Error **errp)
1686 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1689 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1691 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1694 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1695 void (*resized)(const char*,
1696 uint64_t length,
1697 void *host),
1698 MemoryRegion *mr, Error **errp)
1700 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1703 static void reclaim_ramblock(RAMBlock *block)
1705 if (block->flags & RAM_PREALLOC) {
1707 } else if (xen_enabled()) {
1708 xen_invalidate_map_cache_entry(block->host);
1709 #ifndef _WIN32
1710 } else if (block->fd >= 0) {
1711 qemu_ram_munmap(block->host, block->max_length);
1712 close(block->fd);
1713 #endif
1714 } else {
1715 qemu_anon_ram_free(block->host, block->max_length);
1717 g_free(block);
1720 void qemu_ram_free(ram_addr_t addr)
1722 RAMBlock *block;
1724 qemu_mutex_lock_ramlist();
1725 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1726 if (addr == block->offset) {
1727 QLIST_REMOVE_RCU(block, next);
1728 ram_list.mru_block = NULL;
1729 /* Write list before version */
1730 smp_wmb();
1731 ram_list.version++;
1732 call_rcu(block, reclaim_ramblock, rcu);
1733 break;
1736 qemu_mutex_unlock_ramlist();
1739 #ifndef _WIN32
1740 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1742 RAMBlock *block;
1743 ram_addr_t offset;
1744 int flags;
1745 void *area, *vaddr;
1747 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1748 offset = addr - block->offset;
1749 if (offset < block->max_length) {
1750 vaddr = ramblock_ptr(block, offset);
1751 if (block->flags & RAM_PREALLOC) {
1753 } else if (xen_enabled()) {
1754 abort();
1755 } else {
1756 flags = MAP_FIXED;
1757 if (block->fd >= 0) {
1758 flags |= (block->flags & RAM_SHARED ?
1759 MAP_SHARED : MAP_PRIVATE);
1760 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1761 flags, block->fd, offset);
1762 } else {
1764 * Remap needs to match alloc. Accelerators that
1765 * set phys_mem_alloc never remap. If they did,
1766 * we'd need a remap hook here.
1768 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1770 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1771 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1772 flags, -1, 0);
1774 if (area != vaddr) {
1775 fprintf(stderr, "Could not remap addr: "
1776 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1777 length, addr);
1778 exit(1);
1780 memory_try_enable_merging(vaddr, length);
1781 qemu_ram_setup_dump(vaddr, length);
1786 #endif /* !_WIN32 */
1788 int qemu_get_ram_fd(ram_addr_t addr)
1790 RAMBlock *block;
1791 int fd;
1793 rcu_read_lock();
1794 block = qemu_get_ram_block(addr);
1795 fd = block->fd;
1796 rcu_read_unlock();
1797 return fd;
1800 void qemu_set_ram_fd(ram_addr_t addr, int fd)
1802 RAMBlock *block;
1804 rcu_read_lock();
1805 block = qemu_get_ram_block(addr);
1806 block->fd = fd;
1807 rcu_read_unlock();
1810 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1812 RAMBlock *block;
1813 void *ptr;
1815 rcu_read_lock();
1816 block = qemu_get_ram_block(addr);
1817 ptr = ramblock_ptr(block, 0);
1818 rcu_read_unlock();
1819 return ptr;
1822 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1823 * This should not be used for general purpose DMA. Use address_space_map
1824 * or address_space_rw instead. For local memory (e.g. video ram) that the
1825 * device owns, use memory_region_get_ram_ptr.
1827 * Called within RCU critical section.
1829 void *qemu_get_ram_ptr(ram_addr_t addr)
1831 RAMBlock *block = qemu_get_ram_block(addr);
1833 if (xen_enabled() && block->host == NULL) {
1834 /* We need to check if the requested address is in the RAM
1835 * because we don't want to map the entire memory in QEMU.
1836 * In that case just map until the end of the page.
1838 if (block->offset == 0) {
1839 return xen_map_cache(addr, 0, 0);
1842 block->host = xen_map_cache(block->offset, block->max_length, 1);
1844 return ramblock_ptr(block, addr - block->offset);
1847 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1848 * but takes a size argument.
1850 * Called within RCU critical section.
1852 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1854 RAMBlock *block;
1855 ram_addr_t offset_inside_block;
1856 if (*size == 0) {
1857 return NULL;
1860 block = qemu_get_ram_block(addr);
1861 offset_inside_block = addr - block->offset;
1862 *size = MIN(*size, block->max_length - offset_inside_block);
1864 if (xen_enabled() && block->host == NULL) {
1865 /* We need to check if the requested address is in the RAM
1866 * because we don't want to map the entire memory in QEMU.
1867 * In that case just map the requested area.
1869 if (block->offset == 0) {
1870 return xen_map_cache(addr, *size, 1);
1873 block->host = xen_map_cache(block->offset, block->max_length, 1);
1876 return ramblock_ptr(block, offset_inside_block);
1880 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1881 * in that RAMBlock.
1883 * ptr: Host pointer to look up
1884 * round_offset: If true round the result offset down to a page boundary
1885 * *ram_addr: set to result ram_addr
1886 * *offset: set to result offset within the RAMBlock
1888 * Returns: RAMBlock (or NULL if not found)
1890 * By the time this function returns, the returned pointer is not protected
1891 * by RCU anymore. If the caller is not within an RCU critical section and
1892 * does not hold the iothread lock, it must have other means of protecting the
1893 * pointer, such as a reference to the region that includes the incoming
1894 * ram_addr_t.
1896 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1897 ram_addr_t *ram_addr,
1898 ram_addr_t *offset)
1900 RAMBlock *block;
1901 uint8_t *host = ptr;
1903 if (xen_enabled()) {
1904 rcu_read_lock();
1905 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1906 block = qemu_get_ram_block(*ram_addr);
1907 if (block) {
1908 *offset = (host - block->host);
1910 rcu_read_unlock();
1911 return block;
1914 rcu_read_lock();
1915 block = atomic_rcu_read(&ram_list.mru_block);
1916 if (block && block->host && host - block->host < block->max_length) {
1917 goto found;
1920 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1921 /* This case append when the block is not mapped. */
1922 if (block->host == NULL) {
1923 continue;
1925 if (host - block->host < block->max_length) {
1926 goto found;
1930 rcu_read_unlock();
1931 return NULL;
1933 found:
1934 *offset = (host - block->host);
1935 if (round_offset) {
1936 *offset &= TARGET_PAGE_MASK;
1938 *ram_addr = block->offset + *offset;
1939 rcu_read_unlock();
1940 return block;
1944 * Finds the named RAMBlock
1946 * name: The name of RAMBlock to find
1948 * Returns: RAMBlock (or NULL if not found)
1950 RAMBlock *qemu_ram_block_by_name(const char *name)
1952 RAMBlock *block;
1954 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1955 if (!strcmp(name, block->idstr)) {
1956 return block;
1960 return NULL;
1963 /* Some of the softmmu routines need to translate from a host pointer
1964 (typically a TLB entry) back to a ram offset. */
1965 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1967 RAMBlock *block;
1968 ram_addr_t offset; /* Not used */
1970 block = qemu_ram_block_from_host(ptr, false, ram_addr, &offset);
1972 if (!block) {
1973 return NULL;
1976 return block->mr;
1979 /* Called within RCU critical section. */
1980 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1981 uint64_t val, unsigned size)
1983 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1984 tb_invalidate_phys_page_fast(ram_addr, size);
1986 switch (size) {
1987 case 1:
1988 stb_p(qemu_get_ram_ptr(ram_addr), val);
1989 break;
1990 case 2:
1991 stw_p(qemu_get_ram_ptr(ram_addr), val);
1992 break;
1993 case 4:
1994 stl_p(qemu_get_ram_ptr(ram_addr), val);
1995 break;
1996 default:
1997 abort();
1999 /* Set both VGA and migration bits for simplicity and to remove
2000 * the notdirty callback faster.
2002 cpu_physical_memory_set_dirty_range(ram_addr, size,
2003 DIRTY_CLIENTS_NOCODE);
2004 /* we remove the notdirty callback only if the code has been
2005 flushed */
2006 if (!cpu_physical_memory_is_clean(ram_addr)) {
2007 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2011 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2012 unsigned size, bool is_write)
2014 return is_write;
2017 static const MemoryRegionOps notdirty_mem_ops = {
2018 .write = notdirty_mem_write,
2019 .valid.accepts = notdirty_mem_accepts,
2020 .endianness = DEVICE_NATIVE_ENDIAN,
2023 /* Generate a debug exception if a watchpoint has been hit. */
2024 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2026 CPUState *cpu = current_cpu;
2027 CPUArchState *env = cpu->env_ptr;
2028 target_ulong pc, cs_base;
2029 target_ulong vaddr;
2030 CPUWatchpoint *wp;
2031 int cpu_flags;
2033 if (cpu->watchpoint_hit) {
2034 /* We re-entered the check after replacing the TB. Now raise
2035 * the debug interrupt so that is will trigger after the
2036 * current instruction. */
2037 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2038 return;
2040 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2041 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2042 if (cpu_watchpoint_address_matches(wp, vaddr, len)
2043 && (wp->flags & flags)) {
2044 if (flags == BP_MEM_READ) {
2045 wp->flags |= BP_WATCHPOINT_HIT_READ;
2046 } else {
2047 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2049 wp->hitaddr = vaddr;
2050 wp->hitattrs = attrs;
2051 if (!cpu->watchpoint_hit) {
2052 cpu->watchpoint_hit = wp;
2053 tb_check_watchpoint(cpu);
2054 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2055 cpu->exception_index = EXCP_DEBUG;
2056 cpu_loop_exit(cpu);
2057 } else {
2058 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2059 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2060 cpu_resume_from_signal(cpu, NULL);
2063 } else {
2064 wp->flags &= ~BP_WATCHPOINT_HIT;
2069 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2070 so these check for a hit then pass through to the normal out-of-line
2071 phys routines. */
2072 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2073 unsigned size, MemTxAttrs attrs)
2075 MemTxResult res;
2076 uint64_t data;
2077 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2078 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2080 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2081 switch (size) {
2082 case 1:
2083 data = address_space_ldub(as, addr, attrs, &res);
2084 break;
2085 case 2:
2086 data = address_space_lduw(as, addr, attrs, &res);
2087 break;
2088 case 4:
2089 data = address_space_ldl(as, addr, attrs, &res);
2090 break;
2091 default: abort();
2093 *pdata = data;
2094 return res;
2097 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2098 uint64_t val, unsigned size,
2099 MemTxAttrs attrs)
2101 MemTxResult res;
2102 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2103 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2105 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2106 switch (size) {
2107 case 1:
2108 address_space_stb(as, addr, val, attrs, &res);
2109 break;
2110 case 2:
2111 address_space_stw(as, addr, val, attrs, &res);
2112 break;
2113 case 4:
2114 address_space_stl(as, addr, val, attrs, &res);
2115 break;
2116 default: abort();
2118 return res;
2121 static const MemoryRegionOps watch_mem_ops = {
2122 .read_with_attrs = watch_mem_read,
2123 .write_with_attrs = watch_mem_write,
2124 .endianness = DEVICE_NATIVE_ENDIAN,
2127 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2128 unsigned len, MemTxAttrs attrs)
2130 subpage_t *subpage = opaque;
2131 uint8_t buf[8];
2132 MemTxResult res;
2134 #if defined(DEBUG_SUBPAGE)
2135 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2136 subpage, len, addr);
2137 #endif
2138 res = address_space_read(subpage->as, addr + subpage->base,
2139 attrs, buf, len);
2140 if (res) {
2141 return res;
2143 switch (len) {
2144 case 1:
2145 *data = ldub_p(buf);
2146 return MEMTX_OK;
2147 case 2:
2148 *data = lduw_p(buf);
2149 return MEMTX_OK;
2150 case 4:
2151 *data = ldl_p(buf);
2152 return MEMTX_OK;
2153 case 8:
2154 *data = ldq_p(buf);
2155 return MEMTX_OK;
2156 default:
2157 abort();
2161 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2162 uint64_t value, unsigned len, MemTxAttrs attrs)
2164 subpage_t *subpage = opaque;
2165 uint8_t buf[8];
2167 #if defined(DEBUG_SUBPAGE)
2168 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2169 " value %"PRIx64"\n",
2170 __func__, subpage, len, addr, value);
2171 #endif
2172 switch (len) {
2173 case 1:
2174 stb_p(buf, value);
2175 break;
2176 case 2:
2177 stw_p(buf, value);
2178 break;
2179 case 4:
2180 stl_p(buf, value);
2181 break;
2182 case 8:
2183 stq_p(buf, value);
2184 break;
2185 default:
2186 abort();
2188 return address_space_write(subpage->as, addr + subpage->base,
2189 attrs, buf, len);
2192 static bool subpage_accepts(void *opaque, hwaddr addr,
2193 unsigned len, bool is_write)
2195 subpage_t *subpage = opaque;
2196 #if defined(DEBUG_SUBPAGE)
2197 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2198 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2199 #endif
2201 return address_space_access_valid(subpage->as, addr + subpage->base,
2202 len, is_write);
2205 static const MemoryRegionOps subpage_ops = {
2206 .read_with_attrs = subpage_read,
2207 .write_with_attrs = subpage_write,
2208 .impl.min_access_size = 1,
2209 .impl.max_access_size = 8,
2210 .valid.min_access_size = 1,
2211 .valid.max_access_size = 8,
2212 .valid.accepts = subpage_accepts,
2213 .endianness = DEVICE_NATIVE_ENDIAN,
2216 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2217 uint16_t section)
2219 int idx, eidx;
2221 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2222 return -1;
2223 idx = SUBPAGE_IDX(start);
2224 eidx = SUBPAGE_IDX(end);
2225 #if defined(DEBUG_SUBPAGE)
2226 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2227 __func__, mmio, start, end, idx, eidx, section);
2228 #endif
2229 for (; idx <= eidx; idx++) {
2230 mmio->sub_section[idx] = section;
2233 return 0;
2236 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2238 subpage_t *mmio;
2240 mmio = g_malloc0(sizeof(subpage_t));
2242 mmio->as = as;
2243 mmio->base = base;
2244 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2245 NULL, TARGET_PAGE_SIZE);
2246 mmio->iomem.subpage = true;
2247 #if defined(DEBUG_SUBPAGE)
2248 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2249 mmio, base, TARGET_PAGE_SIZE);
2250 #endif
2251 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2253 return mmio;
2256 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2257 MemoryRegion *mr)
2259 assert(as);
2260 MemoryRegionSection section = {
2261 .address_space = as,
2262 .mr = mr,
2263 .offset_within_address_space = 0,
2264 .offset_within_region = 0,
2265 .size = int128_2_64(),
2268 return phys_section_add(map, &section);
2271 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2273 int asidx = cpu_asidx_from_attrs(cpu, attrs);
2274 CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2275 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2276 MemoryRegionSection *sections = d->map.sections;
2278 return sections[index & ~TARGET_PAGE_MASK].mr;
2281 static void io_mem_init(void)
2283 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2284 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2285 NULL, UINT64_MAX);
2286 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2287 NULL, UINT64_MAX);
2288 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2289 NULL, UINT64_MAX);
2292 static void mem_begin(MemoryListener *listener)
2294 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2295 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2296 uint16_t n;
2298 n = dummy_section(&d->map, as, &io_mem_unassigned);
2299 assert(n == PHYS_SECTION_UNASSIGNED);
2300 n = dummy_section(&d->map, as, &io_mem_notdirty);
2301 assert(n == PHYS_SECTION_NOTDIRTY);
2302 n = dummy_section(&d->map, as, &io_mem_rom);
2303 assert(n == PHYS_SECTION_ROM);
2304 n = dummy_section(&d->map, as, &io_mem_watch);
2305 assert(n == PHYS_SECTION_WATCH);
2307 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2308 d->as = as;
2309 as->next_dispatch = d;
2312 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2314 phys_sections_free(&d->map);
2315 g_free(d);
2318 static void mem_commit(MemoryListener *listener)
2320 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2321 AddressSpaceDispatch *cur = as->dispatch;
2322 AddressSpaceDispatch *next = as->next_dispatch;
2324 phys_page_compact_all(next, next->map.nodes_nb);
2326 atomic_rcu_set(&as->dispatch, next);
2327 if (cur) {
2328 call_rcu(cur, address_space_dispatch_free, rcu);
2332 static void tcg_commit(MemoryListener *listener)
2334 CPUAddressSpace *cpuas;
2335 AddressSpaceDispatch *d;
2337 /* since each CPU stores ram addresses in its TLB cache, we must
2338 reset the modified entries */
2339 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2340 cpu_reloading_memory_map();
2341 /* The CPU and TLB are protected by the iothread lock.
2342 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2343 * may have split the RCU critical section.
2345 d = atomic_rcu_read(&cpuas->as->dispatch);
2346 cpuas->memory_dispatch = d;
2347 tlb_flush(cpuas->cpu, 1);
2350 void address_space_init_dispatch(AddressSpace *as)
2352 as->dispatch = NULL;
2353 as->dispatch_listener = (MemoryListener) {
2354 .begin = mem_begin,
2355 .commit = mem_commit,
2356 .region_add = mem_add,
2357 .region_nop = mem_add,
2358 .priority = 0,
2360 memory_listener_register(&as->dispatch_listener, as);
2363 void address_space_unregister(AddressSpace *as)
2365 memory_listener_unregister(&as->dispatch_listener);
2368 void address_space_destroy_dispatch(AddressSpace *as)
2370 AddressSpaceDispatch *d = as->dispatch;
2372 atomic_rcu_set(&as->dispatch, NULL);
2373 if (d) {
2374 call_rcu(d, address_space_dispatch_free, rcu);
2378 static void memory_map_init(void)
2380 system_memory = g_malloc(sizeof(*system_memory));
2382 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2383 address_space_init(&address_space_memory, system_memory, "memory");
2385 system_io = g_malloc(sizeof(*system_io));
2386 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2387 65536);
2388 address_space_init(&address_space_io, system_io, "I/O");
2391 MemoryRegion *get_system_memory(void)
2393 return system_memory;
2396 MemoryRegion *get_system_io(void)
2398 return system_io;
2401 #endif /* !defined(CONFIG_USER_ONLY) */
2403 /* physical memory access (slow version, mainly for debug) */
2404 #if defined(CONFIG_USER_ONLY)
2405 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2406 uint8_t *buf, int len, int is_write)
2408 int l, flags;
2409 target_ulong page;
2410 void * p;
2412 while (len > 0) {
2413 page = addr & TARGET_PAGE_MASK;
2414 l = (page + TARGET_PAGE_SIZE) - addr;
2415 if (l > len)
2416 l = len;
2417 flags = page_get_flags(page);
2418 if (!(flags & PAGE_VALID))
2419 return -1;
2420 if (is_write) {
2421 if (!(flags & PAGE_WRITE))
2422 return -1;
2423 /* XXX: this code should not depend on lock_user */
2424 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2425 return -1;
2426 memcpy(p, buf, l);
2427 unlock_user(p, addr, l);
2428 } else {
2429 if (!(flags & PAGE_READ))
2430 return -1;
2431 /* XXX: this code should not depend on lock_user */
2432 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2433 return -1;
2434 memcpy(buf, p, l);
2435 unlock_user(p, addr, 0);
2437 len -= l;
2438 buf += l;
2439 addr += l;
2441 return 0;
2444 #else
2446 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2447 hwaddr length)
2449 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2450 /* No early return if dirty_log_mask is or becomes 0, because
2451 * cpu_physical_memory_set_dirty_range will still call
2452 * xen_modified_memory.
2454 if (dirty_log_mask) {
2455 dirty_log_mask =
2456 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2458 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2459 tb_invalidate_phys_range(addr, addr + length);
2460 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2462 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2465 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2467 unsigned access_size_max = mr->ops->valid.max_access_size;
2469 /* Regions are assumed to support 1-4 byte accesses unless
2470 otherwise specified. */
2471 if (access_size_max == 0) {
2472 access_size_max = 4;
2475 /* Bound the maximum access by the alignment of the address. */
2476 if (!mr->ops->impl.unaligned) {
2477 unsigned align_size_max = addr & -addr;
2478 if (align_size_max != 0 && align_size_max < access_size_max) {
2479 access_size_max = align_size_max;
2483 /* Don't attempt accesses larger than the maximum. */
2484 if (l > access_size_max) {
2485 l = access_size_max;
2487 l = pow2floor(l);
2489 return l;
2492 static bool prepare_mmio_access(MemoryRegion *mr)
2494 bool unlocked = !qemu_mutex_iothread_locked();
2495 bool release_lock = false;
2497 if (unlocked && mr->global_locking) {
2498 qemu_mutex_lock_iothread();
2499 unlocked = false;
2500 release_lock = true;
2502 if (mr->flush_coalesced_mmio) {
2503 if (unlocked) {
2504 qemu_mutex_lock_iothread();
2506 qemu_flush_coalesced_mmio_buffer();
2507 if (unlocked) {
2508 qemu_mutex_unlock_iothread();
2512 return release_lock;
2515 /* Called within RCU critical section. */
2516 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2517 MemTxAttrs attrs,
2518 const uint8_t *buf,
2519 int len, hwaddr addr1,
2520 hwaddr l, MemoryRegion *mr)
2522 uint8_t *ptr;
2523 uint64_t val;
2524 MemTxResult result = MEMTX_OK;
2525 bool release_lock = false;
2527 for (;;) {
2528 if (!memory_access_is_direct(mr, true)) {
2529 release_lock |= prepare_mmio_access(mr);
2530 l = memory_access_size(mr, l, addr1);
2531 /* XXX: could force current_cpu to NULL to avoid
2532 potential bugs */
2533 switch (l) {
2534 case 8:
2535 /* 64 bit write access */
2536 val = ldq_p(buf);
2537 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2538 attrs);
2539 break;
2540 case 4:
2541 /* 32 bit write access */
2542 val = ldl_p(buf);
2543 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2544 attrs);
2545 break;
2546 case 2:
2547 /* 16 bit write access */
2548 val = lduw_p(buf);
2549 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2550 attrs);
2551 break;
2552 case 1:
2553 /* 8 bit write access */
2554 val = ldub_p(buf);
2555 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2556 attrs);
2557 break;
2558 default:
2559 abort();
2561 } else {
2562 addr1 += memory_region_get_ram_addr(mr);
2563 /* RAM case */
2564 ptr = qemu_get_ram_ptr(addr1);
2565 memcpy(ptr, buf, l);
2566 invalidate_and_set_dirty(mr, addr1, l);
2569 if (release_lock) {
2570 qemu_mutex_unlock_iothread();
2571 release_lock = false;
2574 len -= l;
2575 buf += l;
2576 addr += l;
2578 if (!len) {
2579 break;
2582 l = len;
2583 mr = address_space_translate(as, addr, &addr1, &l, true);
2586 return result;
2589 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2590 const uint8_t *buf, int len)
2592 hwaddr l;
2593 hwaddr addr1;
2594 MemoryRegion *mr;
2595 MemTxResult result = MEMTX_OK;
2597 if (len > 0) {
2598 rcu_read_lock();
2599 l = len;
2600 mr = address_space_translate(as, addr, &addr1, &l, true);
2601 result = address_space_write_continue(as, addr, attrs, buf, len,
2602 addr1, l, mr);
2603 rcu_read_unlock();
2606 return result;
2609 /* Called within RCU critical section. */
2610 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2611 MemTxAttrs attrs, uint8_t *buf,
2612 int len, hwaddr addr1, hwaddr l,
2613 MemoryRegion *mr)
2615 uint8_t *ptr;
2616 uint64_t val;
2617 MemTxResult result = MEMTX_OK;
2618 bool release_lock = false;
2620 for (;;) {
2621 if (!memory_access_is_direct(mr, false)) {
2622 /* I/O case */
2623 release_lock |= prepare_mmio_access(mr);
2624 l = memory_access_size(mr, l, addr1);
2625 switch (l) {
2626 case 8:
2627 /* 64 bit read access */
2628 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2629 attrs);
2630 stq_p(buf, val);
2631 break;
2632 case 4:
2633 /* 32 bit read access */
2634 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2635 attrs);
2636 stl_p(buf, val);
2637 break;
2638 case 2:
2639 /* 16 bit read access */
2640 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2641 attrs);
2642 stw_p(buf, val);
2643 break;
2644 case 1:
2645 /* 8 bit read access */
2646 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2647 attrs);
2648 stb_p(buf, val);
2649 break;
2650 default:
2651 abort();
2653 } else {
2654 /* RAM case */
2655 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2656 memcpy(buf, ptr, l);
2659 if (release_lock) {
2660 qemu_mutex_unlock_iothread();
2661 release_lock = false;
2664 len -= l;
2665 buf += l;
2666 addr += l;
2668 if (!len) {
2669 break;
2672 l = len;
2673 mr = address_space_translate(as, addr, &addr1, &l, false);
2676 return result;
2679 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2680 MemTxAttrs attrs, uint8_t *buf, int len)
2682 hwaddr l;
2683 hwaddr addr1;
2684 MemoryRegion *mr;
2685 MemTxResult result = MEMTX_OK;
2687 if (len > 0) {
2688 rcu_read_lock();
2689 l = len;
2690 mr = address_space_translate(as, addr, &addr1, &l, false);
2691 result = address_space_read_continue(as, addr, attrs, buf, len,
2692 addr1, l, mr);
2693 rcu_read_unlock();
2696 return result;
2699 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2700 uint8_t *buf, int len, bool is_write)
2702 if (is_write) {
2703 return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2704 } else {
2705 return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2709 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2710 int len, int is_write)
2712 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2713 buf, len, is_write);
2716 enum write_rom_type {
2717 WRITE_DATA,
2718 FLUSH_CACHE,
2721 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2722 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2724 hwaddr l;
2725 uint8_t *ptr;
2726 hwaddr addr1;
2727 MemoryRegion *mr;
2729 rcu_read_lock();
2730 while (len > 0) {
2731 l = len;
2732 mr = address_space_translate(as, addr, &addr1, &l, true);
2734 if (!(memory_region_is_ram(mr) ||
2735 memory_region_is_romd(mr))) {
2736 l = memory_access_size(mr, l, addr1);
2737 } else {
2738 addr1 += memory_region_get_ram_addr(mr);
2739 /* ROM/RAM case */
2740 ptr = qemu_get_ram_ptr(addr1);
2741 switch (type) {
2742 case WRITE_DATA:
2743 memcpy(ptr, buf, l);
2744 invalidate_and_set_dirty(mr, addr1, l);
2745 break;
2746 case FLUSH_CACHE:
2747 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2748 break;
2751 len -= l;
2752 buf += l;
2753 addr += l;
2755 rcu_read_unlock();
2758 /* used for ROM loading : can write in RAM and ROM */
2759 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2760 const uint8_t *buf, int len)
2762 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2765 void cpu_flush_icache_range(hwaddr start, int len)
2768 * This function should do the same thing as an icache flush that was
2769 * triggered from within the guest. For TCG we are always cache coherent,
2770 * so there is no need to flush anything. For KVM / Xen we need to flush
2771 * the host's instruction cache at least.
2773 if (tcg_enabled()) {
2774 return;
2777 cpu_physical_memory_write_rom_internal(&address_space_memory,
2778 start, NULL, len, FLUSH_CACHE);
2781 typedef struct {
2782 MemoryRegion *mr;
2783 void *buffer;
2784 hwaddr addr;
2785 hwaddr len;
2786 bool in_use;
2787 } BounceBuffer;
2789 static BounceBuffer bounce;
2791 typedef struct MapClient {
2792 QEMUBH *bh;
2793 QLIST_ENTRY(MapClient) link;
2794 } MapClient;
2796 QemuMutex map_client_list_lock;
2797 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2798 = QLIST_HEAD_INITIALIZER(map_client_list);
2800 static void cpu_unregister_map_client_do(MapClient *client)
2802 QLIST_REMOVE(client, link);
2803 g_free(client);
2806 static void cpu_notify_map_clients_locked(void)
2808 MapClient *client;
2810 while (!QLIST_EMPTY(&map_client_list)) {
2811 client = QLIST_FIRST(&map_client_list);
2812 qemu_bh_schedule(client->bh);
2813 cpu_unregister_map_client_do(client);
2817 void cpu_register_map_client(QEMUBH *bh)
2819 MapClient *client = g_malloc(sizeof(*client));
2821 qemu_mutex_lock(&map_client_list_lock);
2822 client->bh = bh;
2823 QLIST_INSERT_HEAD(&map_client_list, client, link);
2824 if (!atomic_read(&bounce.in_use)) {
2825 cpu_notify_map_clients_locked();
2827 qemu_mutex_unlock(&map_client_list_lock);
2830 void cpu_exec_init_all(void)
2832 qemu_mutex_init(&ram_list.mutex);
2833 io_mem_init();
2834 memory_map_init();
2835 qemu_mutex_init(&map_client_list_lock);
2838 void cpu_unregister_map_client(QEMUBH *bh)
2840 MapClient *client;
2842 qemu_mutex_lock(&map_client_list_lock);
2843 QLIST_FOREACH(client, &map_client_list, link) {
2844 if (client->bh == bh) {
2845 cpu_unregister_map_client_do(client);
2846 break;
2849 qemu_mutex_unlock(&map_client_list_lock);
2852 static void cpu_notify_map_clients(void)
2854 qemu_mutex_lock(&map_client_list_lock);
2855 cpu_notify_map_clients_locked();
2856 qemu_mutex_unlock(&map_client_list_lock);
2859 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2861 MemoryRegion *mr;
2862 hwaddr l, xlat;
2864 rcu_read_lock();
2865 while (len > 0) {
2866 l = len;
2867 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2868 if (!memory_access_is_direct(mr, is_write)) {
2869 l = memory_access_size(mr, l, addr);
2870 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2871 return false;
2875 len -= l;
2876 addr += l;
2878 rcu_read_unlock();
2879 return true;
2882 /* Map a physical memory region into a host virtual address.
2883 * May map a subset of the requested range, given by and returned in *plen.
2884 * May return NULL if resources needed to perform the mapping are exhausted.
2885 * Use only for reads OR writes - not for read-modify-write operations.
2886 * Use cpu_register_map_client() to know when retrying the map operation is
2887 * likely to succeed.
2889 void *address_space_map(AddressSpace *as,
2890 hwaddr addr,
2891 hwaddr *plen,
2892 bool is_write)
2894 hwaddr len = *plen;
2895 hwaddr done = 0;
2896 hwaddr l, xlat, base;
2897 MemoryRegion *mr, *this_mr;
2898 ram_addr_t raddr;
2899 void *ptr;
2901 if (len == 0) {
2902 return NULL;
2905 l = len;
2906 rcu_read_lock();
2907 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2909 if (!memory_access_is_direct(mr, is_write)) {
2910 if (atomic_xchg(&bounce.in_use, true)) {
2911 rcu_read_unlock();
2912 return NULL;
2914 /* Avoid unbounded allocations */
2915 l = MIN(l, TARGET_PAGE_SIZE);
2916 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2917 bounce.addr = addr;
2918 bounce.len = l;
2920 memory_region_ref(mr);
2921 bounce.mr = mr;
2922 if (!is_write) {
2923 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2924 bounce.buffer, l);
2927 rcu_read_unlock();
2928 *plen = l;
2929 return bounce.buffer;
2932 base = xlat;
2933 raddr = memory_region_get_ram_addr(mr);
2935 for (;;) {
2936 len -= l;
2937 addr += l;
2938 done += l;
2939 if (len == 0) {
2940 break;
2943 l = len;
2944 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2945 if (this_mr != mr || xlat != base + done) {
2946 break;
2950 memory_region_ref(mr);
2951 *plen = done;
2952 ptr = qemu_ram_ptr_length(raddr + base, plen);
2953 rcu_read_unlock();
2955 return ptr;
2958 /* Unmaps a memory region previously mapped by address_space_map().
2959 * Will also mark the memory as dirty if is_write == 1. access_len gives
2960 * the amount of memory that was actually read or written by the caller.
2962 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2963 int is_write, hwaddr access_len)
2965 if (buffer != bounce.buffer) {
2966 MemoryRegion *mr;
2967 ram_addr_t addr1;
2969 mr = qemu_ram_addr_from_host(buffer, &addr1);
2970 assert(mr != NULL);
2971 if (is_write) {
2972 invalidate_and_set_dirty(mr, addr1, access_len);
2974 if (xen_enabled()) {
2975 xen_invalidate_map_cache_entry(buffer);
2977 memory_region_unref(mr);
2978 return;
2980 if (is_write) {
2981 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
2982 bounce.buffer, access_len);
2984 qemu_vfree(bounce.buffer);
2985 bounce.buffer = NULL;
2986 memory_region_unref(bounce.mr);
2987 atomic_mb_set(&bounce.in_use, false);
2988 cpu_notify_map_clients();
2991 void *cpu_physical_memory_map(hwaddr addr,
2992 hwaddr *plen,
2993 int is_write)
2995 return address_space_map(&address_space_memory, addr, plen, is_write);
2998 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2999 int is_write, hwaddr access_len)
3001 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3004 /* warning: addr must be aligned */
3005 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3006 MemTxAttrs attrs,
3007 MemTxResult *result,
3008 enum device_endian endian)
3010 uint8_t *ptr;
3011 uint64_t val;
3012 MemoryRegion *mr;
3013 hwaddr l = 4;
3014 hwaddr addr1;
3015 MemTxResult r;
3016 bool release_lock = false;
3018 rcu_read_lock();
3019 mr = address_space_translate(as, addr, &addr1, &l, false);
3020 if (l < 4 || !memory_access_is_direct(mr, false)) {
3021 release_lock |= prepare_mmio_access(mr);
3023 /* I/O case */
3024 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3025 #if defined(TARGET_WORDS_BIGENDIAN)
3026 if (endian == DEVICE_LITTLE_ENDIAN) {
3027 val = bswap32(val);
3029 #else
3030 if (endian == DEVICE_BIG_ENDIAN) {
3031 val = bswap32(val);
3033 #endif
3034 } else {
3035 /* RAM case */
3036 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3037 & TARGET_PAGE_MASK)
3038 + addr1);
3039 switch (endian) {
3040 case DEVICE_LITTLE_ENDIAN:
3041 val = ldl_le_p(ptr);
3042 break;
3043 case DEVICE_BIG_ENDIAN:
3044 val = ldl_be_p(ptr);
3045 break;
3046 default:
3047 val = ldl_p(ptr);
3048 break;
3050 r = MEMTX_OK;
3052 if (result) {
3053 *result = r;
3055 if (release_lock) {
3056 qemu_mutex_unlock_iothread();
3058 rcu_read_unlock();
3059 return val;
3062 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3063 MemTxAttrs attrs, MemTxResult *result)
3065 return address_space_ldl_internal(as, addr, attrs, result,
3066 DEVICE_NATIVE_ENDIAN);
3069 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3070 MemTxAttrs attrs, MemTxResult *result)
3072 return address_space_ldl_internal(as, addr, attrs, result,
3073 DEVICE_LITTLE_ENDIAN);
3076 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3077 MemTxAttrs attrs, MemTxResult *result)
3079 return address_space_ldl_internal(as, addr, attrs, result,
3080 DEVICE_BIG_ENDIAN);
3083 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3085 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3088 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3090 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3093 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3095 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3098 /* warning: addr must be aligned */
3099 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3100 MemTxAttrs attrs,
3101 MemTxResult *result,
3102 enum device_endian endian)
3104 uint8_t *ptr;
3105 uint64_t val;
3106 MemoryRegion *mr;
3107 hwaddr l = 8;
3108 hwaddr addr1;
3109 MemTxResult r;
3110 bool release_lock = false;
3112 rcu_read_lock();
3113 mr = address_space_translate(as, addr, &addr1, &l,
3114 false);
3115 if (l < 8 || !memory_access_is_direct(mr, false)) {
3116 release_lock |= prepare_mmio_access(mr);
3118 /* I/O case */
3119 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3120 #if defined(TARGET_WORDS_BIGENDIAN)
3121 if (endian == DEVICE_LITTLE_ENDIAN) {
3122 val = bswap64(val);
3124 #else
3125 if (endian == DEVICE_BIG_ENDIAN) {
3126 val = bswap64(val);
3128 #endif
3129 } else {
3130 /* RAM case */
3131 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3132 & TARGET_PAGE_MASK)
3133 + addr1);
3134 switch (endian) {
3135 case DEVICE_LITTLE_ENDIAN:
3136 val = ldq_le_p(ptr);
3137 break;
3138 case DEVICE_BIG_ENDIAN:
3139 val = ldq_be_p(ptr);
3140 break;
3141 default:
3142 val = ldq_p(ptr);
3143 break;
3145 r = MEMTX_OK;
3147 if (result) {
3148 *result = r;
3150 if (release_lock) {
3151 qemu_mutex_unlock_iothread();
3153 rcu_read_unlock();
3154 return val;
3157 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3158 MemTxAttrs attrs, MemTxResult *result)
3160 return address_space_ldq_internal(as, addr, attrs, result,
3161 DEVICE_NATIVE_ENDIAN);
3164 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3165 MemTxAttrs attrs, MemTxResult *result)
3167 return address_space_ldq_internal(as, addr, attrs, result,
3168 DEVICE_LITTLE_ENDIAN);
3171 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3172 MemTxAttrs attrs, MemTxResult *result)
3174 return address_space_ldq_internal(as, addr, attrs, result,
3175 DEVICE_BIG_ENDIAN);
3178 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3180 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3183 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3185 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3188 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3190 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3193 /* XXX: optimize */
3194 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3195 MemTxAttrs attrs, MemTxResult *result)
3197 uint8_t val;
3198 MemTxResult r;
3200 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3201 if (result) {
3202 *result = r;
3204 return val;
3207 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3209 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3212 /* warning: addr must be aligned */
3213 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3214 hwaddr addr,
3215 MemTxAttrs attrs,
3216 MemTxResult *result,
3217 enum device_endian endian)
3219 uint8_t *ptr;
3220 uint64_t val;
3221 MemoryRegion *mr;
3222 hwaddr l = 2;
3223 hwaddr addr1;
3224 MemTxResult r;
3225 bool release_lock = false;
3227 rcu_read_lock();
3228 mr = address_space_translate(as, addr, &addr1, &l,
3229 false);
3230 if (l < 2 || !memory_access_is_direct(mr, false)) {
3231 release_lock |= prepare_mmio_access(mr);
3233 /* I/O case */
3234 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3235 #if defined(TARGET_WORDS_BIGENDIAN)
3236 if (endian == DEVICE_LITTLE_ENDIAN) {
3237 val = bswap16(val);
3239 #else
3240 if (endian == DEVICE_BIG_ENDIAN) {
3241 val = bswap16(val);
3243 #endif
3244 } else {
3245 /* RAM case */
3246 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3247 & TARGET_PAGE_MASK)
3248 + addr1);
3249 switch (endian) {
3250 case DEVICE_LITTLE_ENDIAN:
3251 val = lduw_le_p(ptr);
3252 break;
3253 case DEVICE_BIG_ENDIAN:
3254 val = lduw_be_p(ptr);
3255 break;
3256 default:
3257 val = lduw_p(ptr);
3258 break;
3260 r = MEMTX_OK;
3262 if (result) {
3263 *result = r;
3265 if (release_lock) {
3266 qemu_mutex_unlock_iothread();
3268 rcu_read_unlock();
3269 return val;
3272 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3273 MemTxAttrs attrs, MemTxResult *result)
3275 return address_space_lduw_internal(as, addr, attrs, result,
3276 DEVICE_NATIVE_ENDIAN);
3279 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3280 MemTxAttrs attrs, MemTxResult *result)
3282 return address_space_lduw_internal(as, addr, attrs, result,
3283 DEVICE_LITTLE_ENDIAN);
3286 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3287 MemTxAttrs attrs, MemTxResult *result)
3289 return address_space_lduw_internal(as, addr, attrs, result,
3290 DEVICE_BIG_ENDIAN);
3293 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3295 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3298 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3300 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3303 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3305 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3308 /* warning: addr must be aligned. The ram page is not masked as dirty
3309 and the code inside is not invalidated. It is useful if the dirty
3310 bits are used to track modified PTEs */
3311 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3312 MemTxAttrs attrs, MemTxResult *result)
3314 uint8_t *ptr;
3315 MemoryRegion *mr;
3316 hwaddr l = 4;
3317 hwaddr addr1;
3318 MemTxResult r;
3319 uint8_t dirty_log_mask;
3320 bool release_lock = false;
3322 rcu_read_lock();
3323 mr = address_space_translate(as, addr, &addr1, &l,
3324 true);
3325 if (l < 4 || !memory_access_is_direct(mr, true)) {
3326 release_lock |= prepare_mmio_access(mr);
3328 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3329 } else {
3330 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3331 ptr = qemu_get_ram_ptr(addr1);
3332 stl_p(ptr, val);
3334 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3335 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3336 cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3337 r = MEMTX_OK;
3339 if (result) {
3340 *result = r;
3342 if (release_lock) {
3343 qemu_mutex_unlock_iothread();
3345 rcu_read_unlock();
3348 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3350 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3353 /* warning: addr must be aligned */
3354 static inline void address_space_stl_internal(AddressSpace *as,
3355 hwaddr addr, uint32_t val,
3356 MemTxAttrs attrs,
3357 MemTxResult *result,
3358 enum device_endian endian)
3360 uint8_t *ptr;
3361 MemoryRegion *mr;
3362 hwaddr l = 4;
3363 hwaddr addr1;
3364 MemTxResult r;
3365 bool release_lock = false;
3367 rcu_read_lock();
3368 mr = address_space_translate(as, addr, &addr1, &l,
3369 true);
3370 if (l < 4 || !memory_access_is_direct(mr, true)) {
3371 release_lock |= prepare_mmio_access(mr);
3373 #if defined(TARGET_WORDS_BIGENDIAN)
3374 if (endian == DEVICE_LITTLE_ENDIAN) {
3375 val = bswap32(val);
3377 #else
3378 if (endian == DEVICE_BIG_ENDIAN) {
3379 val = bswap32(val);
3381 #endif
3382 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3383 } else {
3384 /* RAM case */
3385 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3386 ptr = qemu_get_ram_ptr(addr1);
3387 switch (endian) {
3388 case DEVICE_LITTLE_ENDIAN:
3389 stl_le_p(ptr, val);
3390 break;
3391 case DEVICE_BIG_ENDIAN:
3392 stl_be_p(ptr, val);
3393 break;
3394 default:
3395 stl_p(ptr, val);
3396 break;
3398 invalidate_and_set_dirty(mr, addr1, 4);
3399 r = MEMTX_OK;
3401 if (result) {
3402 *result = r;
3404 if (release_lock) {
3405 qemu_mutex_unlock_iothread();
3407 rcu_read_unlock();
3410 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3411 MemTxAttrs attrs, MemTxResult *result)
3413 address_space_stl_internal(as, addr, val, attrs, result,
3414 DEVICE_NATIVE_ENDIAN);
3417 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3418 MemTxAttrs attrs, MemTxResult *result)
3420 address_space_stl_internal(as, addr, val, attrs, result,
3421 DEVICE_LITTLE_ENDIAN);
3424 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3425 MemTxAttrs attrs, MemTxResult *result)
3427 address_space_stl_internal(as, addr, val, attrs, result,
3428 DEVICE_BIG_ENDIAN);
3431 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3433 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3436 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3438 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3441 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3443 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3446 /* XXX: optimize */
3447 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3448 MemTxAttrs attrs, MemTxResult *result)
3450 uint8_t v = val;
3451 MemTxResult r;
3453 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3454 if (result) {
3455 *result = r;
3459 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3461 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3464 /* warning: addr must be aligned */
3465 static inline void address_space_stw_internal(AddressSpace *as,
3466 hwaddr addr, uint32_t val,
3467 MemTxAttrs attrs,
3468 MemTxResult *result,
3469 enum device_endian endian)
3471 uint8_t *ptr;
3472 MemoryRegion *mr;
3473 hwaddr l = 2;
3474 hwaddr addr1;
3475 MemTxResult r;
3476 bool release_lock = false;
3478 rcu_read_lock();
3479 mr = address_space_translate(as, addr, &addr1, &l, true);
3480 if (l < 2 || !memory_access_is_direct(mr, true)) {
3481 release_lock |= prepare_mmio_access(mr);
3483 #if defined(TARGET_WORDS_BIGENDIAN)
3484 if (endian == DEVICE_LITTLE_ENDIAN) {
3485 val = bswap16(val);
3487 #else
3488 if (endian == DEVICE_BIG_ENDIAN) {
3489 val = bswap16(val);
3491 #endif
3492 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3493 } else {
3494 /* RAM case */
3495 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3496 ptr = qemu_get_ram_ptr(addr1);
3497 switch (endian) {
3498 case DEVICE_LITTLE_ENDIAN:
3499 stw_le_p(ptr, val);
3500 break;
3501 case DEVICE_BIG_ENDIAN:
3502 stw_be_p(ptr, val);
3503 break;
3504 default:
3505 stw_p(ptr, val);
3506 break;
3508 invalidate_and_set_dirty(mr, addr1, 2);
3509 r = MEMTX_OK;
3511 if (result) {
3512 *result = r;
3514 if (release_lock) {
3515 qemu_mutex_unlock_iothread();
3517 rcu_read_unlock();
3520 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3521 MemTxAttrs attrs, MemTxResult *result)
3523 address_space_stw_internal(as, addr, val, attrs, result,
3524 DEVICE_NATIVE_ENDIAN);
3527 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3528 MemTxAttrs attrs, MemTxResult *result)
3530 address_space_stw_internal(as, addr, val, attrs, result,
3531 DEVICE_LITTLE_ENDIAN);
3534 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3535 MemTxAttrs attrs, MemTxResult *result)
3537 address_space_stw_internal(as, addr, val, attrs, result,
3538 DEVICE_BIG_ENDIAN);
3541 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3543 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3546 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3548 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3551 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3553 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3556 /* XXX: optimize */
3557 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3558 MemTxAttrs attrs, MemTxResult *result)
3560 MemTxResult r;
3561 val = tswap64(val);
3562 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3563 if (result) {
3564 *result = r;
3568 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3569 MemTxAttrs attrs, MemTxResult *result)
3571 MemTxResult r;
3572 val = cpu_to_le64(val);
3573 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3574 if (result) {
3575 *result = r;
3578 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3579 MemTxAttrs attrs, MemTxResult *result)
3581 MemTxResult r;
3582 val = cpu_to_be64(val);
3583 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3584 if (result) {
3585 *result = r;
3589 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3591 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3594 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3596 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3599 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3601 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3604 /* virtual memory access for debug (includes writing to ROM) */
3605 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3606 uint8_t *buf, int len, int is_write)
3608 int l;
3609 hwaddr phys_addr;
3610 target_ulong page;
3612 while (len > 0) {
3613 int asidx;
3614 MemTxAttrs attrs;
3616 page = addr & TARGET_PAGE_MASK;
3617 phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3618 asidx = cpu_asidx_from_attrs(cpu, attrs);
3619 /* if no physical page mapped, return an error */
3620 if (phys_addr == -1)
3621 return -1;
3622 l = (page + TARGET_PAGE_SIZE) - addr;
3623 if (l > len)
3624 l = len;
3625 phys_addr += (addr & ~TARGET_PAGE_MASK);
3626 if (is_write) {
3627 cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3628 phys_addr, buf, l);
3629 } else {
3630 address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3631 MEMTXATTRS_UNSPECIFIED,
3632 buf, l, 0);
3634 len -= l;
3635 buf += l;
3636 addr += l;
3638 return 0;
3642 * Allows code that needs to deal with migration bitmaps etc to still be built
3643 * target independent.
3645 size_t qemu_target_page_bits(void)
3647 return TARGET_PAGE_BITS;
3650 #endif
3653 * A helper function for the _utterly broken_ virtio device model to find out if
3654 * it's running on a big endian machine. Don't do this at home kids!
3656 bool target_words_bigendian(void);
3657 bool target_words_bigendian(void)
3659 #if defined(TARGET_WORDS_BIGENDIAN)
3660 return true;
3661 #else
3662 return false;
3663 #endif
3666 #ifndef CONFIG_USER_ONLY
3667 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3669 MemoryRegion*mr;
3670 hwaddr l = 1;
3671 bool res;
3673 rcu_read_lock();
3674 mr = address_space_translate(&address_space_memory,
3675 phys_addr, &phys_addr, &l, false);
3677 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3678 rcu_read_unlock();
3679 return res;
3682 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3684 RAMBlock *block;
3685 int ret = 0;
3687 rcu_read_lock();
3688 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3689 ret = func(block->idstr, block->host, block->offset,
3690 block->used_length, opaque);
3691 if (ret) {
3692 break;
3695 rcu_read_unlock();
3696 return ret;
3698 #endif