block: fix deadlock in bdrv_co_flush
[qemu/kevin.git] / exec.c
blob8ffde75983e082aa4206cd0417104ff02fffb736
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include "qapi/error.h"
21 #ifndef _WIN32
22 #endif
24 #include "qemu/cutils.h"
25 #include "cpu.h"
26 #include "exec/exec-all.h"
27 #include "tcg.h"
28 #include "hw/qdev-core.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
31 #include "hw/xen/xen.h"
32 #endif
33 #include "sysemu/kvm.h"
34 #include "sysemu/sysemu.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "qemu/error-report.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include "qemu.h"
40 #else /* !CONFIG_USER_ONLY */
41 #include "hw/hw.h"
42 #include "exec/memory.h"
43 #include "exec/ioport.h"
44 #include "sysemu/dma.h"
45 #include "exec/address-spaces.h"
46 #include "sysemu/xen-mapcache.h"
47 #include "trace.h"
48 #endif
49 #include "exec/cpu-all.h"
50 #include "qemu/rcu_queue.h"
51 #include "qemu/main-loop.h"
52 #include "translate-all.h"
53 #include "sysemu/replay.h"
55 #include "exec/memory-internal.h"
56 #include "exec/ram_addr.h"
57 #include "exec/log.h"
59 #include "migration/vmstate.h"
61 #include "qemu/range.h"
62 #ifndef _WIN32
63 #include "qemu/mmap-alloc.h"
64 #endif
66 //#define DEBUG_SUBPAGE
68 #if !defined(CONFIG_USER_ONLY)
69 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
70 * are protected by the ramlist lock.
72 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
74 static MemoryRegion *system_memory;
75 static MemoryRegion *system_io;
77 AddressSpace address_space_io;
78 AddressSpace address_space_memory;
80 MemoryRegion io_mem_rom, io_mem_notdirty;
81 static MemoryRegion io_mem_unassigned;
83 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
84 #define RAM_PREALLOC (1 << 0)
86 /* RAM is mmap-ed with MAP_SHARED */
87 #define RAM_SHARED (1 << 1)
89 /* Only a portion of RAM (used_length) is actually used, and migrated.
90 * This used_length size can change across reboots.
92 #define RAM_RESIZEABLE (1 << 2)
94 #endif
96 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
97 /* current CPU in the current thread. It is only valid inside
98 cpu_exec() */
99 __thread CPUState *current_cpu;
100 /* 0 = Do not count executed instructions.
101 1 = Precise instruction counting.
102 2 = Adaptive rate instruction counting. */
103 int use_icount;
105 #if !defined(CONFIG_USER_ONLY)
107 typedef struct PhysPageEntry PhysPageEntry;
109 struct PhysPageEntry {
110 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
111 uint32_t skip : 6;
112 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
113 uint32_t ptr : 26;
116 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
118 /* Size of the L2 (and L3, etc) page tables. */
119 #define ADDR_SPACE_BITS 64
121 #define P_L2_BITS 9
122 #define P_L2_SIZE (1 << P_L2_BITS)
124 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
126 typedef PhysPageEntry Node[P_L2_SIZE];
128 typedef struct PhysPageMap {
129 struct rcu_head rcu;
131 unsigned sections_nb;
132 unsigned sections_nb_alloc;
133 unsigned nodes_nb;
134 unsigned nodes_nb_alloc;
135 Node *nodes;
136 MemoryRegionSection *sections;
137 } PhysPageMap;
139 struct AddressSpaceDispatch {
140 struct rcu_head rcu;
142 MemoryRegionSection *mru_section;
143 /* This is a multi-level map on the physical address space.
144 * The bottom level has pointers to MemoryRegionSections.
146 PhysPageEntry phys_map;
147 PhysPageMap map;
148 AddressSpace *as;
151 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
152 typedef struct subpage_t {
153 MemoryRegion iomem;
154 AddressSpace *as;
155 hwaddr base;
156 uint16_t sub_section[TARGET_PAGE_SIZE];
157 } subpage_t;
159 #define PHYS_SECTION_UNASSIGNED 0
160 #define PHYS_SECTION_NOTDIRTY 1
161 #define PHYS_SECTION_ROM 2
162 #define PHYS_SECTION_WATCH 3
164 static void io_mem_init(void);
165 static void memory_map_init(void);
166 static void tcg_commit(MemoryListener *listener);
168 static MemoryRegion io_mem_watch;
171 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
172 * @cpu: the CPU whose AddressSpace this is
173 * @as: the AddressSpace itself
174 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
175 * @tcg_as_listener: listener for tracking changes to the AddressSpace
177 struct CPUAddressSpace {
178 CPUState *cpu;
179 AddressSpace *as;
180 struct AddressSpaceDispatch *memory_dispatch;
181 MemoryListener tcg_as_listener;
184 #endif
186 #if !defined(CONFIG_USER_ONLY)
188 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
190 static unsigned alloc_hint = 16;
191 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
192 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, alloc_hint);
193 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
194 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
195 alloc_hint = map->nodes_nb_alloc;
199 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
201 unsigned i;
202 uint32_t ret;
203 PhysPageEntry e;
204 PhysPageEntry *p;
206 ret = map->nodes_nb++;
207 p = map->nodes[ret];
208 assert(ret != PHYS_MAP_NODE_NIL);
209 assert(ret != map->nodes_nb_alloc);
211 e.skip = leaf ? 0 : 1;
212 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
213 for (i = 0; i < P_L2_SIZE; ++i) {
214 memcpy(&p[i], &e, sizeof(e));
216 return ret;
219 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
220 hwaddr *index, hwaddr *nb, uint16_t leaf,
221 int level)
223 PhysPageEntry *p;
224 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
226 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
227 lp->ptr = phys_map_node_alloc(map, level == 0);
229 p = map->nodes[lp->ptr];
230 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
232 while (*nb && lp < &p[P_L2_SIZE]) {
233 if ((*index & (step - 1)) == 0 && *nb >= step) {
234 lp->skip = 0;
235 lp->ptr = leaf;
236 *index += step;
237 *nb -= step;
238 } else {
239 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
241 ++lp;
245 static void phys_page_set(AddressSpaceDispatch *d,
246 hwaddr index, hwaddr nb,
247 uint16_t leaf)
249 /* Wildly overreserve - it doesn't matter much. */
250 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
252 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
255 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
256 * and update our entry so we can skip it and go directly to the destination.
258 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
260 unsigned valid_ptr = P_L2_SIZE;
261 int valid = 0;
262 PhysPageEntry *p;
263 int i;
265 if (lp->ptr == PHYS_MAP_NODE_NIL) {
266 return;
269 p = nodes[lp->ptr];
270 for (i = 0; i < P_L2_SIZE; i++) {
271 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
272 continue;
275 valid_ptr = i;
276 valid++;
277 if (p[i].skip) {
278 phys_page_compact(&p[i], nodes, compacted);
282 /* We can only compress if there's only one child. */
283 if (valid != 1) {
284 return;
287 assert(valid_ptr < P_L2_SIZE);
289 /* Don't compress if it won't fit in the # of bits we have. */
290 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
291 return;
294 lp->ptr = p[valid_ptr].ptr;
295 if (!p[valid_ptr].skip) {
296 /* If our only child is a leaf, make this a leaf. */
297 /* By design, we should have made this node a leaf to begin with so we
298 * should never reach here.
299 * But since it's so simple to handle this, let's do it just in case we
300 * change this rule.
302 lp->skip = 0;
303 } else {
304 lp->skip += p[valid_ptr].skip;
308 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
310 DECLARE_BITMAP(compacted, nodes_nb);
312 if (d->phys_map.skip) {
313 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
317 static inline bool section_covers_addr(const MemoryRegionSection *section,
318 hwaddr addr)
320 /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
321 * the section must cover the entire address space.
323 return section->size.hi ||
324 range_covers_byte(section->offset_within_address_space,
325 section->size.lo, addr);
328 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
329 Node *nodes, MemoryRegionSection *sections)
331 PhysPageEntry *p;
332 hwaddr index = addr >> TARGET_PAGE_BITS;
333 int i;
335 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
336 if (lp.ptr == PHYS_MAP_NODE_NIL) {
337 return &sections[PHYS_SECTION_UNASSIGNED];
339 p = nodes[lp.ptr];
340 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
343 if (section_covers_addr(&sections[lp.ptr], addr)) {
344 return &sections[lp.ptr];
345 } else {
346 return &sections[PHYS_SECTION_UNASSIGNED];
350 bool memory_region_is_unassigned(MemoryRegion *mr)
352 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
353 && mr != &io_mem_watch;
356 /* Called from RCU critical section */
357 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
358 hwaddr addr,
359 bool resolve_subpage)
361 MemoryRegionSection *section = atomic_read(&d->mru_section);
362 subpage_t *subpage;
363 bool update;
365 if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
366 section_covers_addr(section, addr)) {
367 update = false;
368 } else {
369 section = phys_page_find(d->phys_map, addr, d->map.nodes,
370 d->map.sections);
371 update = true;
373 if (resolve_subpage && section->mr->subpage) {
374 subpage = container_of(section->mr, subpage_t, iomem);
375 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
377 if (update) {
378 atomic_set(&d->mru_section, section);
380 return section;
383 /* Called from RCU critical section */
384 static MemoryRegionSection *
385 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
386 hwaddr *plen, bool resolve_subpage)
388 MemoryRegionSection *section;
389 MemoryRegion *mr;
390 Int128 diff;
392 section = address_space_lookup_region(d, addr, resolve_subpage);
393 /* Compute offset within MemoryRegionSection */
394 addr -= section->offset_within_address_space;
396 /* Compute offset within MemoryRegion */
397 *xlat = addr + section->offset_within_region;
399 mr = section->mr;
401 /* MMIO registers can be expected to perform full-width accesses based only
402 * on their address, without considering adjacent registers that could
403 * decode to completely different MemoryRegions. When such registers
404 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
405 * regions overlap wildly. For this reason we cannot clamp the accesses
406 * here.
408 * If the length is small (as is the case for address_space_ldl/stl),
409 * everything works fine. If the incoming length is large, however,
410 * the caller really has to do the clamping through memory_access_size.
412 if (memory_region_is_ram(mr)) {
413 diff = int128_sub(section->size, int128_make64(addr));
414 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
416 return section;
419 /* Called from RCU critical section */
420 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
421 hwaddr *xlat, hwaddr *plen,
422 bool is_write)
424 IOMMUTLBEntry iotlb;
425 MemoryRegionSection *section;
426 MemoryRegion *mr;
428 for (;;) {
429 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
430 section = address_space_translate_internal(d, addr, &addr, plen, true);
431 mr = section->mr;
433 if (!mr->iommu_ops) {
434 break;
437 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
438 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
439 | (addr & iotlb.addr_mask));
440 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
441 if (!(iotlb.perm & (1 << is_write))) {
442 mr = &io_mem_unassigned;
443 break;
446 as = iotlb.target_as;
449 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
450 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
451 *plen = MIN(page, *plen);
454 *xlat = addr;
455 return mr;
458 /* Called from RCU critical section */
459 MemoryRegionSection *
460 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
461 hwaddr *xlat, hwaddr *plen)
463 MemoryRegionSection *section;
464 AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;
466 section = address_space_translate_internal(d, addr, xlat, plen, false);
468 assert(!section->mr->iommu_ops);
469 return section;
471 #endif
473 #if !defined(CONFIG_USER_ONLY)
475 static int cpu_common_post_load(void *opaque, int version_id)
477 CPUState *cpu = opaque;
479 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
480 version_id is increased. */
481 cpu->interrupt_request &= ~0x01;
482 tlb_flush(cpu, 1);
484 return 0;
487 static int cpu_common_pre_load(void *opaque)
489 CPUState *cpu = opaque;
491 cpu->exception_index = -1;
493 return 0;
496 static bool cpu_common_exception_index_needed(void *opaque)
498 CPUState *cpu = opaque;
500 return tcg_enabled() && cpu->exception_index != -1;
503 static const VMStateDescription vmstate_cpu_common_exception_index = {
504 .name = "cpu_common/exception_index",
505 .version_id = 1,
506 .minimum_version_id = 1,
507 .needed = cpu_common_exception_index_needed,
508 .fields = (VMStateField[]) {
509 VMSTATE_INT32(exception_index, CPUState),
510 VMSTATE_END_OF_LIST()
514 static bool cpu_common_crash_occurred_needed(void *opaque)
516 CPUState *cpu = opaque;
518 return cpu->crash_occurred;
521 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
522 .name = "cpu_common/crash_occurred",
523 .version_id = 1,
524 .minimum_version_id = 1,
525 .needed = cpu_common_crash_occurred_needed,
526 .fields = (VMStateField[]) {
527 VMSTATE_BOOL(crash_occurred, CPUState),
528 VMSTATE_END_OF_LIST()
532 const VMStateDescription vmstate_cpu_common = {
533 .name = "cpu_common",
534 .version_id = 1,
535 .minimum_version_id = 1,
536 .pre_load = cpu_common_pre_load,
537 .post_load = cpu_common_post_load,
538 .fields = (VMStateField[]) {
539 VMSTATE_UINT32(halted, CPUState),
540 VMSTATE_UINT32(interrupt_request, CPUState),
541 VMSTATE_END_OF_LIST()
543 .subsections = (const VMStateDescription*[]) {
544 &vmstate_cpu_common_exception_index,
545 &vmstate_cpu_common_crash_occurred,
546 NULL
550 #endif
552 CPUState *qemu_get_cpu(int index)
554 CPUState *cpu;
556 CPU_FOREACH(cpu) {
557 if (cpu->cpu_index == index) {
558 return cpu;
562 return NULL;
565 #if !defined(CONFIG_USER_ONLY)
566 void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
568 CPUAddressSpace *newas;
570 /* Target code should have set num_ases before calling us */
571 assert(asidx < cpu->num_ases);
573 if (asidx == 0) {
574 /* address space 0 gets the convenience alias */
575 cpu->as = as;
578 /* KVM cannot currently support multiple address spaces. */
579 assert(asidx == 0 || !kvm_enabled());
581 if (!cpu->cpu_ases) {
582 cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
585 newas = &cpu->cpu_ases[asidx];
586 newas->cpu = cpu;
587 newas->as = as;
588 if (tcg_enabled()) {
589 newas->tcg_as_listener.commit = tcg_commit;
590 memory_listener_register(&newas->tcg_as_listener, as);
594 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
596 /* Return the AddressSpace corresponding to the specified index */
597 return cpu->cpu_ases[asidx].as;
599 #endif
601 static int cpu_get_free_index(void)
603 CPUState *some_cpu;
604 int cpu_index = 0;
606 CPU_FOREACH(some_cpu) {
607 cpu_index++;
609 return cpu_index;
612 void cpu_exec_exit(CPUState *cpu)
614 CPUClass *cc = CPU_GET_CLASS(cpu);
616 cpu_list_lock();
617 if (cpu->node.tqe_prev == NULL) {
618 /* there is nothing to undo since cpu_exec_init() hasn't been called */
619 cpu_list_unlock();
620 return;
623 QTAILQ_REMOVE(&cpus, cpu, node);
624 cpu->node.tqe_prev = NULL;
625 cpu->cpu_index = UNASSIGNED_CPU_INDEX;
626 cpu_list_unlock();
628 if (cc->vmsd != NULL) {
629 vmstate_unregister(NULL, cc->vmsd, cpu);
631 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
632 vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
636 void cpu_exec_init(CPUState *cpu, Error **errp)
638 CPUClass *cc ATTRIBUTE_UNUSED = CPU_GET_CLASS(cpu);
639 Error *local_err ATTRIBUTE_UNUSED = NULL;
641 cpu->as = NULL;
642 cpu->num_ases = 0;
644 #ifndef CONFIG_USER_ONLY
645 cpu->thread_id = qemu_get_thread_id();
647 /* This is a softmmu CPU object, so create a property for it
648 * so users can wire up its memory. (This can't go in qom/cpu.c
649 * because that file is compiled only once for both user-mode
650 * and system builds.) The default if no link is set up is to use
651 * the system address space.
653 object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
654 (Object **)&cpu->memory,
655 qdev_prop_allow_set_link_before_realize,
656 OBJ_PROP_LINK_UNREF_ON_RELEASE,
657 &error_abort);
658 cpu->memory = system_memory;
659 object_ref(OBJECT(cpu->memory));
660 #endif
662 cpu_list_lock();
663 if (cpu->cpu_index == UNASSIGNED_CPU_INDEX) {
664 cpu->cpu_index = cpu_get_free_index();
665 assert(cpu->cpu_index != UNASSIGNED_CPU_INDEX);
667 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
668 cpu_list_unlock();
670 #ifndef CONFIG_USER_ONLY
671 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
672 vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
674 if (cc->vmsd != NULL) {
675 vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
677 #endif
680 #if defined(CONFIG_USER_ONLY)
681 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
683 tb_invalidate_phys_page_range(pc, pc + 1, 0);
685 #else
686 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
688 MemTxAttrs attrs;
689 hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
690 int asidx = cpu_asidx_from_attrs(cpu, attrs);
691 if (phys != -1) {
692 tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
693 phys | (pc & ~TARGET_PAGE_MASK));
696 #endif
698 #if defined(CONFIG_USER_ONLY)
699 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
704 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
705 int flags)
707 return -ENOSYS;
710 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
714 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
715 int flags, CPUWatchpoint **watchpoint)
717 return -ENOSYS;
719 #else
720 /* Add a watchpoint. */
721 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
722 int flags, CPUWatchpoint **watchpoint)
724 CPUWatchpoint *wp;
726 /* forbid ranges which are empty or run off the end of the address space */
727 if (len == 0 || (addr + len - 1) < addr) {
728 error_report("tried to set invalid watchpoint at %"
729 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
730 return -EINVAL;
732 wp = g_malloc(sizeof(*wp));
734 wp->vaddr = addr;
735 wp->len = len;
736 wp->flags = flags;
738 /* keep all GDB-injected watchpoints in front */
739 if (flags & BP_GDB) {
740 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
741 } else {
742 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
745 tlb_flush_page(cpu, addr);
747 if (watchpoint)
748 *watchpoint = wp;
749 return 0;
752 /* Remove a specific watchpoint. */
753 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
754 int flags)
756 CPUWatchpoint *wp;
758 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
759 if (addr == wp->vaddr && len == wp->len
760 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
761 cpu_watchpoint_remove_by_ref(cpu, wp);
762 return 0;
765 return -ENOENT;
768 /* Remove a specific watchpoint by reference. */
769 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
771 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
773 tlb_flush_page(cpu, watchpoint->vaddr);
775 g_free(watchpoint);
778 /* Remove all matching watchpoints. */
779 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
781 CPUWatchpoint *wp, *next;
783 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
784 if (wp->flags & mask) {
785 cpu_watchpoint_remove_by_ref(cpu, wp);
790 /* Return true if this watchpoint address matches the specified
791 * access (ie the address range covered by the watchpoint overlaps
792 * partially or completely with the address range covered by the
793 * access).
795 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
796 vaddr addr,
797 vaddr len)
799 /* We know the lengths are non-zero, but a little caution is
800 * required to avoid errors in the case where the range ends
801 * exactly at the top of the address space and so addr + len
802 * wraps round to zero.
804 vaddr wpend = wp->vaddr + wp->len - 1;
805 vaddr addrend = addr + len - 1;
807 return !(addr > wpend || wp->vaddr > addrend);
810 #endif
812 /* Add a breakpoint. */
813 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
814 CPUBreakpoint **breakpoint)
816 CPUBreakpoint *bp;
818 bp = g_malloc(sizeof(*bp));
820 bp->pc = pc;
821 bp->flags = flags;
823 /* keep all GDB-injected breakpoints in front */
824 if (flags & BP_GDB) {
825 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
826 } else {
827 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
830 breakpoint_invalidate(cpu, pc);
832 if (breakpoint) {
833 *breakpoint = bp;
835 return 0;
838 /* Remove a specific breakpoint. */
839 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
841 CPUBreakpoint *bp;
843 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
844 if (bp->pc == pc && bp->flags == flags) {
845 cpu_breakpoint_remove_by_ref(cpu, bp);
846 return 0;
849 return -ENOENT;
852 /* Remove a specific breakpoint by reference. */
853 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
855 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
857 breakpoint_invalidate(cpu, breakpoint->pc);
859 g_free(breakpoint);
862 /* Remove all matching breakpoints. */
863 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
865 CPUBreakpoint *bp, *next;
867 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
868 if (bp->flags & mask) {
869 cpu_breakpoint_remove_by_ref(cpu, bp);
874 /* enable or disable single step mode. EXCP_DEBUG is returned by the
875 CPU loop after each instruction */
876 void cpu_single_step(CPUState *cpu, int enabled)
878 if (cpu->singlestep_enabled != enabled) {
879 cpu->singlestep_enabled = enabled;
880 if (kvm_enabled()) {
881 kvm_update_guest_debug(cpu, 0);
882 } else {
883 /* must flush all the translated code to avoid inconsistencies */
884 /* XXX: only flush what is necessary */
885 tb_flush(cpu);
890 void cpu_abort(CPUState *cpu, const char *fmt, ...)
892 va_list ap;
893 va_list ap2;
895 va_start(ap, fmt);
896 va_copy(ap2, ap);
897 fprintf(stderr, "qemu: fatal: ");
898 vfprintf(stderr, fmt, ap);
899 fprintf(stderr, "\n");
900 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
901 if (qemu_log_separate()) {
902 qemu_log("qemu: fatal: ");
903 qemu_log_vprintf(fmt, ap2);
904 qemu_log("\n");
905 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
906 qemu_log_flush();
907 qemu_log_close();
909 va_end(ap2);
910 va_end(ap);
911 replay_finish();
912 #if defined(CONFIG_USER_ONLY)
914 struct sigaction act;
915 sigfillset(&act.sa_mask);
916 act.sa_handler = SIG_DFL;
917 sigaction(SIGABRT, &act, NULL);
919 #endif
920 abort();
923 #if !defined(CONFIG_USER_ONLY)
924 /* Called from RCU critical section */
925 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
927 RAMBlock *block;
929 block = atomic_rcu_read(&ram_list.mru_block);
930 if (block && addr - block->offset < block->max_length) {
931 return block;
933 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
934 if (addr - block->offset < block->max_length) {
935 goto found;
939 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
940 abort();
942 found:
943 /* It is safe to write mru_block outside the iothread lock. This
944 * is what happens:
946 * mru_block = xxx
947 * rcu_read_unlock()
948 * xxx removed from list
949 * rcu_read_lock()
950 * read mru_block
951 * mru_block = NULL;
952 * call_rcu(reclaim_ramblock, xxx);
953 * rcu_read_unlock()
955 * atomic_rcu_set is not needed here. The block was already published
956 * when it was placed into the list. Here we're just making an extra
957 * copy of the pointer.
959 ram_list.mru_block = block;
960 return block;
963 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
965 CPUState *cpu;
966 ram_addr_t start1;
967 RAMBlock *block;
968 ram_addr_t end;
970 end = TARGET_PAGE_ALIGN(start + length);
971 start &= TARGET_PAGE_MASK;
973 rcu_read_lock();
974 block = qemu_get_ram_block(start);
975 assert(block == qemu_get_ram_block(end - 1));
976 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
977 CPU_FOREACH(cpu) {
978 tlb_reset_dirty(cpu, start1, length);
980 rcu_read_unlock();
983 /* Note: start and end must be within the same ram block. */
984 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
985 ram_addr_t length,
986 unsigned client)
988 DirtyMemoryBlocks *blocks;
989 unsigned long end, page;
990 bool dirty = false;
992 if (length == 0) {
993 return false;
996 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
997 page = start >> TARGET_PAGE_BITS;
999 rcu_read_lock();
1001 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1003 while (page < end) {
1004 unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1005 unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1006 unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1008 dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1009 offset, num);
1010 page += num;
1013 rcu_read_unlock();
1015 if (dirty && tcg_enabled()) {
1016 tlb_reset_dirty_range_all(start, length);
1019 return dirty;
1022 /* Called from RCU critical section */
1023 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1024 MemoryRegionSection *section,
1025 target_ulong vaddr,
1026 hwaddr paddr, hwaddr xlat,
1027 int prot,
1028 target_ulong *address)
1030 hwaddr iotlb;
1031 CPUWatchpoint *wp;
1033 if (memory_region_is_ram(section->mr)) {
1034 /* Normal RAM. */
1035 iotlb = memory_region_get_ram_addr(section->mr) + xlat;
1036 if (!section->readonly) {
1037 iotlb |= PHYS_SECTION_NOTDIRTY;
1038 } else {
1039 iotlb |= PHYS_SECTION_ROM;
1041 } else {
1042 AddressSpaceDispatch *d;
1044 d = atomic_rcu_read(&section->address_space->dispatch);
1045 iotlb = section - d->map.sections;
1046 iotlb += xlat;
1049 /* Make accesses to pages with watchpoints go via the
1050 watchpoint trap routines. */
1051 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1052 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1053 /* Avoid trapping reads of pages with a write breakpoint. */
1054 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1055 iotlb = PHYS_SECTION_WATCH + paddr;
1056 *address |= TLB_MMIO;
1057 break;
1062 return iotlb;
1064 #endif /* defined(CONFIG_USER_ONLY) */
1066 #if !defined(CONFIG_USER_ONLY)
1068 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1069 uint16_t section);
1070 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1072 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1073 qemu_anon_ram_alloc;
1076 * Set a custom physical guest memory alloator.
1077 * Accelerators with unusual needs may need this. Hopefully, we can
1078 * get rid of it eventually.
1080 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1082 phys_mem_alloc = alloc;
1085 static uint16_t phys_section_add(PhysPageMap *map,
1086 MemoryRegionSection *section)
1088 /* The physical section number is ORed with a page-aligned
1089 * pointer to produce the iotlb entries. Thus it should
1090 * never overflow into the page-aligned value.
1092 assert(map->sections_nb < TARGET_PAGE_SIZE);
1094 if (map->sections_nb == map->sections_nb_alloc) {
1095 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1096 map->sections = g_renew(MemoryRegionSection, map->sections,
1097 map->sections_nb_alloc);
1099 map->sections[map->sections_nb] = *section;
1100 memory_region_ref(section->mr);
1101 return map->sections_nb++;
1104 static void phys_section_destroy(MemoryRegion *mr)
1106 bool have_sub_page = mr->subpage;
1108 memory_region_unref(mr);
1110 if (have_sub_page) {
1111 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1112 object_unref(OBJECT(&subpage->iomem));
1113 g_free(subpage);
1117 static void phys_sections_free(PhysPageMap *map)
1119 while (map->sections_nb > 0) {
1120 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1121 phys_section_destroy(section->mr);
1123 g_free(map->sections);
1124 g_free(map->nodes);
1127 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1129 subpage_t *subpage;
1130 hwaddr base = section->offset_within_address_space
1131 & TARGET_PAGE_MASK;
1132 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1133 d->map.nodes, d->map.sections);
1134 MemoryRegionSection subsection = {
1135 .offset_within_address_space = base,
1136 .size = int128_make64(TARGET_PAGE_SIZE),
1138 hwaddr start, end;
1140 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1142 if (!(existing->mr->subpage)) {
1143 subpage = subpage_init(d->as, base);
1144 subsection.address_space = d->as;
1145 subsection.mr = &subpage->iomem;
1146 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1147 phys_section_add(&d->map, &subsection));
1148 } else {
1149 subpage = container_of(existing->mr, subpage_t, iomem);
1151 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1152 end = start + int128_get64(section->size) - 1;
1153 subpage_register(subpage, start, end,
1154 phys_section_add(&d->map, section));
1158 static void register_multipage(AddressSpaceDispatch *d,
1159 MemoryRegionSection *section)
1161 hwaddr start_addr = section->offset_within_address_space;
1162 uint16_t section_index = phys_section_add(&d->map, section);
1163 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1164 TARGET_PAGE_BITS));
1166 assert(num_pages);
1167 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1170 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1172 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1173 AddressSpaceDispatch *d = as->next_dispatch;
1174 MemoryRegionSection now = *section, remain = *section;
1175 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1177 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1178 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1179 - now.offset_within_address_space;
1181 now.size = int128_min(int128_make64(left), now.size);
1182 register_subpage(d, &now);
1183 } else {
1184 now.size = int128_zero();
1186 while (int128_ne(remain.size, now.size)) {
1187 remain.size = int128_sub(remain.size, now.size);
1188 remain.offset_within_address_space += int128_get64(now.size);
1189 remain.offset_within_region += int128_get64(now.size);
1190 now = remain;
1191 if (int128_lt(remain.size, page_size)) {
1192 register_subpage(d, &now);
1193 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1194 now.size = page_size;
1195 register_subpage(d, &now);
1196 } else {
1197 now.size = int128_and(now.size, int128_neg(page_size));
1198 register_multipage(d, &now);
1203 void qemu_flush_coalesced_mmio_buffer(void)
1205 if (kvm_enabled())
1206 kvm_flush_coalesced_mmio_buffer();
1209 void qemu_mutex_lock_ramlist(void)
1211 qemu_mutex_lock(&ram_list.mutex);
1214 void qemu_mutex_unlock_ramlist(void)
1216 qemu_mutex_unlock(&ram_list.mutex);
1219 #ifdef __linux__
1220 static void *file_ram_alloc(RAMBlock *block,
1221 ram_addr_t memory,
1222 const char *path,
1223 Error **errp)
1225 bool unlink_on_error = false;
1226 char *filename;
1227 char *sanitized_name;
1228 char *c;
1229 void *area = MAP_FAILED;
1230 int fd = -1;
1231 int64_t page_size;
1233 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1234 error_setg(errp,
1235 "host lacks kvm mmu notifiers, -mem-path unsupported");
1236 return NULL;
1239 for (;;) {
1240 fd = open(path, O_RDWR);
1241 if (fd >= 0) {
1242 /* @path names an existing file, use it */
1243 break;
1245 if (errno == ENOENT) {
1246 /* @path names a file that doesn't exist, create it */
1247 fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1248 if (fd >= 0) {
1249 unlink_on_error = true;
1250 break;
1252 } else if (errno == EISDIR) {
1253 /* @path names a directory, create a file there */
1254 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1255 sanitized_name = g_strdup(memory_region_name(block->mr));
1256 for (c = sanitized_name; *c != '\0'; c++) {
1257 if (*c == '/') {
1258 *c = '_';
1262 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1263 sanitized_name);
1264 g_free(sanitized_name);
1266 fd = mkstemp(filename);
1267 if (fd >= 0) {
1268 unlink(filename);
1269 g_free(filename);
1270 break;
1272 g_free(filename);
1274 if (errno != EEXIST && errno != EINTR) {
1275 error_setg_errno(errp, errno,
1276 "can't open backing store %s for guest RAM",
1277 path);
1278 goto error;
1281 * Try again on EINTR and EEXIST. The latter happens when
1282 * something else creates the file between our two open().
1286 page_size = qemu_fd_getpagesize(fd);
1287 block->mr->align = MAX(page_size, QEMU_VMALLOC_ALIGN);
1289 if (memory < page_size) {
1290 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1291 "or larger than page size 0x%" PRIx64,
1292 memory, page_size);
1293 goto error;
1296 memory = ROUND_UP(memory, page_size);
1299 * ftruncate is not supported by hugetlbfs in older
1300 * hosts, so don't bother bailing out on errors.
1301 * If anything goes wrong with it under other filesystems,
1302 * mmap will fail.
1304 if (ftruncate(fd, memory)) {
1305 perror("ftruncate");
1308 area = qemu_ram_mmap(fd, memory, block->mr->align,
1309 block->flags & RAM_SHARED);
1310 if (area == MAP_FAILED) {
1311 error_setg_errno(errp, errno,
1312 "unable to map backing store for guest RAM");
1313 goto error;
1316 if (mem_prealloc) {
1317 os_mem_prealloc(fd, area, memory, errp);
1318 if (errp && *errp) {
1319 goto error;
1323 block->fd = fd;
1324 return area;
1326 error:
1327 if (area != MAP_FAILED) {
1328 qemu_ram_munmap(area, memory);
1330 if (unlink_on_error) {
1331 unlink(path);
1333 if (fd != -1) {
1334 close(fd);
1336 return NULL;
1338 #endif
1340 /* Called with the ramlist lock held. */
1341 static ram_addr_t find_ram_offset(ram_addr_t size)
1343 RAMBlock *block, *next_block;
1344 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1346 assert(size != 0); /* it would hand out same offset multiple times */
1348 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1349 return 0;
1352 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1353 ram_addr_t end, next = RAM_ADDR_MAX;
1355 end = block->offset + block->max_length;
1357 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1358 if (next_block->offset >= end) {
1359 next = MIN(next, next_block->offset);
1362 if (next - end >= size && next - end < mingap) {
1363 offset = end;
1364 mingap = next - end;
1368 if (offset == RAM_ADDR_MAX) {
1369 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1370 (uint64_t)size);
1371 abort();
1374 return offset;
1377 ram_addr_t last_ram_offset(void)
1379 RAMBlock *block;
1380 ram_addr_t last = 0;
1382 rcu_read_lock();
1383 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1384 last = MAX(last, block->offset + block->max_length);
1386 rcu_read_unlock();
1387 return last;
1390 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1392 int ret;
1394 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1395 if (!machine_dump_guest_core(current_machine)) {
1396 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1397 if (ret) {
1398 perror("qemu_madvise");
1399 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1400 "but dump_guest_core=off specified\n");
1405 const char *qemu_ram_get_idstr(RAMBlock *rb)
1407 return rb->idstr;
1410 /* Called with iothread lock held. */
1411 void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
1413 RAMBlock *block;
1415 assert(new_block);
1416 assert(!new_block->idstr[0]);
1418 if (dev) {
1419 char *id = qdev_get_dev_path(dev);
1420 if (id) {
1421 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1422 g_free(id);
1425 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1427 rcu_read_lock();
1428 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1429 if (block != new_block &&
1430 !strcmp(block->idstr, new_block->idstr)) {
1431 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1432 new_block->idstr);
1433 abort();
1436 rcu_read_unlock();
1439 /* Called with iothread lock held. */
1440 void qemu_ram_unset_idstr(RAMBlock *block)
1442 /* FIXME: arch_init.c assumes that this is not called throughout
1443 * migration. Ignore the problem since hot-unplug during migration
1444 * does not work anyway.
1446 if (block) {
1447 memset(block->idstr, 0, sizeof(block->idstr));
1451 static int memory_try_enable_merging(void *addr, size_t len)
1453 if (!machine_mem_merge(current_machine)) {
1454 /* disabled by the user */
1455 return 0;
1458 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1461 /* Only legal before guest might have detected the memory size: e.g. on
1462 * incoming migration, or right after reset.
1464 * As memory core doesn't know how is memory accessed, it is up to
1465 * resize callback to update device state and/or add assertions to detect
1466 * misuse, if necessary.
1468 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
1470 assert(block);
1472 newsize = HOST_PAGE_ALIGN(newsize);
1474 if (block->used_length == newsize) {
1475 return 0;
1478 if (!(block->flags & RAM_RESIZEABLE)) {
1479 error_setg_errno(errp, EINVAL,
1480 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1481 " in != 0x" RAM_ADDR_FMT, block->idstr,
1482 newsize, block->used_length);
1483 return -EINVAL;
1486 if (block->max_length < newsize) {
1487 error_setg_errno(errp, EINVAL,
1488 "Length too large: %s: 0x" RAM_ADDR_FMT
1489 " > 0x" RAM_ADDR_FMT, block->idstr,
1490 newsize, block->max_length);
1491 return -EINVAL;
1494 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1495 block->used_length = newsize;
1496 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1497 DIRTY_CLIENTS_ALL);
1498 memory_region_set_size(block->mr, newsize);
1499 if (block->resized) {
1500 block->resized(block->idstr, newsize, block->host);
1502 return 0;
1505 /* Called with ram_list.mutex held */
1506 static void dirty_memory_extend(ram_addr_t old_ram_size,
1507 ram_addr_t new_ram_size)
1509 ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1510 DIRTY_MEMORY_BLOCK_SIZE);
1511 ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1512 DIRTY_MEMORY_BLOCK_SIZE);
1513 int i;
1515 /* Only need to extend if block count increased */
1516 if (new_num_blocks <= old_num_blocks) {
1517 return;
1520 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1521 DirtyMemoryBlocks *old_blocks;
1522 DirtyMemoryBlocks *new_blocks;
1523 int j;
1525 old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1526 new_blocks = g_malloc(sizeof(*new_blocks) +
1527 sizeof(new_blocks->blocks[0]) * new_num_blocks);
1529 if (old_num_blocks) {
1530 memcpy(new_blocks->blocks, old_blocks->blocks,
1531 old_num_blocks * sizeof(old_blocks->blocks[0]));
1534 for (j = old_num_blocks; j < new_num_blocks; j++) {
1535 new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1538 atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1540 if (old_blocks) {
1541 g_free_rcu(old_blocks, rcu);
1546 static void ram_block_add(RAMBlock *new_block, Error **errp)
1548 RAMBlock *block;
1549 RAMBlock *last_block = NULL;
1550 ram_addr_t old_ram_size, new_ram_size;
1551 Error *err = NULL;
1553 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1555 qemu_mutex_lock_ramlist();
1556 new_block->offset = find_ram_offset(new_block->max_length);
1558 if (!new_block->host) {
1559 if (xen_enabled()) {
1560 xen_ram_alloc(new_block->offset, new_block->max_length,
1561 new_block->mr, &err);
1562 if (err) {
1563 error_propagate(errp, err);
1564 qemu_mutex_unlock_ramlist();
1565 return;
1567 } else {
1568 new_block->host = phys_mem_alloc(new_block->max_length,
1569 &new_block->mr->align);
1570 if (!new_block->host) {
1571 error_setg_errno(errp, errno,
1572 "cannot set up guest memory '%s'",
1573 memory_region_name(new_block->mr));
1574 qemu_mutex_unlock_ramlist();
1575 return;
1577 memory_try_enable_merging(new_block->host, new_block->max_length);
1581 new_ram_size = MAX(old_ram_size,
1582 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1583 if (new_ram_size > old_ram_size) {
1584 migration_bitmap_extend(old_ram_size, new_ram_size);
1585 dirty_memory_extend(old_ram_size, new_ram_size);
1587 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1588 * QLIST (which has an RCU-friendly variant) does not have insertion at
1589 * tail, so save the last element in last_block.
1591 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1592 last_block = block;
1593 if (block->max_length < new_block->max_length) {
1594 break;
1597 if (block) {
1598 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1599 } else if (last_block) {
1600 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1601 } else { /* list is empty */
1602 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1604 ram_list.mru_block = NULL;
1606 /* Write list before version */
1607 smp_wmb();
1608 ram_list.version++;
1609 qemu_mutex_unlock_ramlist();
1611 cpu_physical_memory_set_dirty_range(new_block->offset,
1612 new_block->used_length,
1613 DIRTY_CLIENTS_ALL);
1615 if (new_block->host) {
1616 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1617 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1618 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1619 if (kvm_enabled()) {
1620 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1625 #ifdef __linux__
1626 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1627 bool share, const char *mem_path,
1628 Error **errp)
1630 RAMBlock *new_block;
1631 Error *local_err = NULL;
1633 if (xen_enabled()) {
1634 error_setg(errp, "-mem-path not supported with Xen");
1635 return NULL;
1638 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1640 * file_ram_alloc() needs to allocate just like
1641 * phys_mem_alloc, but we haven't bothered to provide
1642 * a hook there.
1644 error_setg(errp,
1645 "-mem-path not supported with this accelerator");
1646 return NULL;
1649 size = HOST_PAGE_ALIGN(size);
1650 new_block = g_malloc0(sizeof(*new_block));
1651 new_block->mr = mr;
1652 new_block->used_length = size;
1653 new_block->max_length = size;
1654 new_block->flags = share ? RAM_SHARED : 0;
1655 new_block->host = file_ram_alloc(new_block, size,
1656 mem_path, errp);
1657 if (!new_block->host) {
1658 g_free(new_block);
1659 return NULL;
1662 ram_block_add(new_block, &local_err);
1663 if (local_err) {
1664 g_free(new_block);
1665 error_propagate(errp, local_err);
1666 return NULL;
1668 return new_block;
1670 #endif
1672 static
1673 RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1674 void (*resized)(const char*,
1675 uint64_t length,
1676 void *host),
1677 void *host, bool resizeable,
1678 MemoryRegion *mr, Error **errp)
1680 RAMBlock *new_block;
1681 Error *local_err = NULL;
1683 size = HOST_PAGE_ALIGN(size);
1684 max_size = HOST_PAGE_ALIGN(max_size);
1685 new_block = g_malloc0(sizeof(*new_block));
1686 new_block->mr = mr;
1687 new_block->resized = resized;
1688 new_block->used_length = size;
1689 new_block->max_length = max_size;
1690 assert(max_size >= size);
1691 new_block->fd = -1;
1692 new_block->host = host;
1693 if (host) {
1694 new_block->flags |= RAM_PREALLOC;
1696 if (resizeable) {
1697 new_block->flags |= RAM_RESIZEABLE;
1699 ram_block_add(new_block, &local_err);
1700 if (local_err) {
1701 g_free(new_block);
1702 error_propagate(errp, local_err);
1703 return NULL;
1705 return new_block;
1708 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1709 MemoryRegion *mr, Error **errp)
1711 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1714 RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1716 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1719 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1720 void (*resized)(const char*,
1721 uint64_t length,
1722 void *host),
1723 MemoryRegion *mr, Error **errp)
1725 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1728 static void reclaim_ramblock(RAMBlock *block)
1730 if (block->flags & RAM_PREALLOC) {
1732 } else if (xen_enabled()) {
1733 xen_invalidate_map_cache_entry(block->host);
1734 #ifndef _WIN32
1735 } else if (block->fd >= 0) {
1736 qemu_ram_munmap(block->host, block->max_length);
1737 close(block->fd);
1738 #endif
1739 } else {
1740 qemu_anon_ram_free(block->host, block->max_length);
1742 g_free(block);
1745 void qemu_ram_free(RAMBlock *block)
1747 if (!block) {
1748 return;
1751 qemu_mutex_lock_ramlist();
1752 QLIST_REMOVE_RCU(block, next);
1753 ram_list.mru_block = NULL;
1754 /* Write list before version */
1755 smp_wmb();
1756 ram_list.version++;
1757 call_rcu(block, reclaim_ramblock, rcu);
1758 qemu_mutex_unlock_ramlist();
1761 #ifndef _WIN32
1762 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1764 RAMBlock *block;
1765 ram_addr_t offset;
1766 int flags;
1767 void *area, *vaddr;
1769 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1770 offset = addr - block->offset;
1771 if (offset < block->max_length) {
1772 vaddr = ramblock_ptr(block, offset);
1773 if (block->flags & RAM_PREALLOC) {
1775 } else if (xen_enabled()) {
1776 abort();
1777 } else {
1778 flags = MAP_FIXED;
1779 if (block->fd >= 0) {
1780 flags |= (block->flags & RAM_SHARED ?
1781 MAP_SHARED : MAP_PRIVATE);
1782 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1783 flags, block->fd, offset);
1784 } else {
1786 * Remap needs to match alloc. Accelerators that
1787 * set phys_mem_alloc never remap. If they did,
1788 * we'd need a remap hook here.
1790 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1792 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1793 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1794 flags, -1, 0);
1796 if (area != vaddr) {
1797 fprintf(stderr, "Could not remap addr: "
1798 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1799 length, addr);
1800 exit(1);
1802 memory_try_enable_merging(vaddr, length);
1803 qemu_ram_setup_dump(vaddr, length);
1808 #endif /* !_WIN32 */
1810 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1811 * This should not be used for general purpose DMA. Use address_space_map
1812 * or address_space_rw instead. For local memory (e.g. video ram) that the
1813 * device owns, use memory_region_get_ram_ptr.
1815 * Called within RCU critical section.
1817 void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1819 RAMBlock *block = ram_block;
1821 if (block == NULL) {
1822 block = qemu_get_ram_block(addr);
1823 addr -= block->offset;
1826 if (xen_enabled() && block->host == NULL) {
1827 /* We need to check if the requested address is in the RAM
1828 * because we don't want to map the entire memory in QEMU.
1829 * In that case just map until the end of the page.
1831 if (block->offset == 0) {
1832 return xen_map_cache(addr, 0, 0);
1835 block->host = xen_map_cache(block->offset, block->max_length, 1);
1837 return ramblock_ptr(block, addr);
1840 /* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr
1841 * but takes a size argument.
1843 * Called within RCU critical section.
1845 static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1846 hwaddr *size)
1848 RAMBlock *block = ram_block;
1849 if (*size == 0) {
1850 return NULL;
1853 if (block == NULL) {
1854 block = qemu_get_ram_block(addr);
1855 addr -= block->offset;
1857 *size = MIN(*size, block->max_length - addr);
1859 if (xen_enabled() && block->host == NULL) {
1860 /* We need to check if the requested address is in the RAM
1861 * because we don't want to map the entire memory in QEMU.
1862 * In that case just map the requested area.
1864 if (block->offset == 0) {
1865 return xen_map_cache(addr, *size, 1);
1868 block->host = xen_map_cache(block->offset, block->max_length, 1);
1871 return ramblock_ptr(block, addr);
1875 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1876 * in that RAMBlock.
1878 * ptr: Host pointer to look up
1879 * round_offset: If true round the result offset down to a page boundary
1880 * *ram_addr: set to result ram_addr
1881 * *offset: set to result offset within the RAMBlock
1883 * Returns: RAMBlock (or NULL if not found)
1885 * By the time this function returns, the returned pointer is not protected
1886 * by RCU anymore. If the caller is not within an RCU critical section and
1887 * does not hold the iothread lock, it must have other means of protecting the
1888 * pointer, such as a reference to the region that includes the incoming
1889 * ram_addr_t.
1891 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1892 ram_addr_t *offset)
1894 RAMBlock *block;
1895 uint8_t *host = ptr;
1897 if (xen_enabled()) {
1898 ram_addr_t ram_addr;
1899 rcu_read_lock();
1900 ram_addr = xen_ram_addr_from_mapcache(ptr);
1901 block = qemu_get_ram_block(ram_addr);
1902 if (block) {
1903 *offset = ram_addr - block->offset;
1905 rcu_read_unlock();
1906 return block;
1909 rcu_read_lock();
1910 block = atomic_rcu_read(&ram_list.mru_block);
1911 if (block && block->host && host - block->host < block->max_length) {
1912 goto found;
1915 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1916 /* This case append when the block is not mapped. */
1917 if (block->host == NULL) {
1918 continue;
1920 if (host - block->host < block->max_length) {
1921 goto found;
1925 rcu_read_unlock();
1926 return NULL;
1928 found:
1929 *offset = (host - block->host);
1930 if (round_offset) {
1931 *offset &= TARGET_PAGE_MASK;
1933 rcu_read_unlock();
1934 return block;
1938 * Finds the named RAMBlock
1940 * name: The name of RAMBlock to find
1942 * Returns: RAMBlock (or NULL if not found)
1944 RAMBlock *qemu_ram_block_by_name(const char *name)
1946 RAMBlock *block;
1948 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1949 if (!strcmp(name, block->idstr)) {
1950 return block;
1954 return NULL;
1957 /* Some of the softmmu routines need to translate from a host pointer
1958 (typically a TLB entry) back to a ram offset. */
1959 ram_addr_t qemu_ram_addr_from_host(void *ptr)
1961 RAMBlock *block;
1962 ram_addr_t offset;
1964 block = qemu_ram_block_from_host(ptr, false, &offset);
1965 if (!block) {
1966 return RAM_ADDR_INVALID;
1969 return block->offset + offset;
1972 /* Called within RCU critical section. */
1973 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1974 uint64_t val, unsigned size)
1976 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1977 tb_invalidate_phys_page_fast(ram_addr, size);
1979 switch (size) {
1980 case 1:
1981 stb_p(qemu_map_ram_ptr(NULL, ram_addr), val);
1982 break;
1983 case 2:
1984 stw_p(qemu_map_ram_ptr(NULL, ram_addr), val);
1985 break;
1986 case 4:
1987 stl_p(qemu_map_ram_ptr(NULL, ram_addr), val);
1988 break;
1989 default:
1990 abort();
1992 /* Set both VGA and migration bits for simplicity and to remove
1993 * the notdirty callback faster.
1995 cpu_physical_memory_set_dirty_range(ram_addr, size,
1996 DIRTY_CLIENTS_NOCODE);
1997 /* we remove the notdirty callback only if the code has been
1998 flushed */
1999 if (!cpu_physical_memory_is_clean(ram_addr)) {
2000 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2004 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2005 unsigned size, bool is_write)
2007 return is_write;
2010 static const MemoryRegionOps notdirty_mem_ops = {
2011 .write = notdirty_mem_write,
2012 .valid.accepts = notdirty_mem_accepts,
2013 .endianness = DEVICE_NATIVE_ENDIAN,
2016 /* Generate a debug exception if a watchpoint has been hit. */
2017 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2019 CPUState *cpu = current_cpu;
2020 CPUClass *cc = CPU_GET_CLASS(cpu);
2021 CPUArchState *env = cpu->env_ptr;
2022 target_ulong pc, cs_base;
2023 target_ulong vaddr;
2024 CPUWatchpoint *wp;
2025 uint32_t cpu_flags;
2027 if (cpu->watchpoint_hit) {
2028 /* We re-entered the check after replacing the TB. Now raise
2029 * the debug interrupt so that is will trigger after the
2030 * current instruction. */
2031 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2032 return;
2034 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2035 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2036 if (cpu_watchpoint_address_matches(wp, vaddr, len)
2037 && (wp->flags & flags)) {
2038 if (flags == BP_MEM_READ) {
2039 wp->flags |= BP_WATCHPOINT_HIT_READ;
2040 } else {
2041 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2043 wp->hitaddr = vaddr;
2044 wp->hitattrs = attrs;
2045 if (!cpu->watchpoint_hit) {
2046 if (wp->flags & BP_CPU &&
2047 !cc->debug_check_watchpoint(cpu, wp)) {
2048 wp->flags &= ~BP_WATCHPOINT_HIT;
2049 continue;
2051 cpu->watchpoint_hit = wp;
2052 tb_check_watchpoint(cpu);
2053 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2054 cpu->exception_index = EXCP_DEBUG;
2055 cpu_loop_exit(cpu);
2056 } else {
2057 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2058 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2059 cpu_loop_exit_noexc(cpu);
2062 } else {
2063 wp->flags &= ~BP_WATCHPOINT_HIT;
2068 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2069 so these check for a hit then pass through to the normal out-of-line
2070 phys routines. */
2071 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2072 unsigned size, MemTxAttrs attrs)
2074 MemTxResult res;
2075 uint64_t data;
2076 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2077 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2079 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2080 switch (size) {
2081 case 1:
2082 data = address_space_ldub(as, addr, attrs, &res);
2083 break;
2084 case 2:
2085 data = address_space_lduw(as, addr, attrs, &res);
2086 break;
2087 case 4:
2088 data = address_space_ldl(as, addr, attrs, &res);
2089 break;
2090 default: abort();
2092 *pdata = data;
2093 return res;
2096 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2097 uint64_t val, unsigned size,
2098 MemTxAttrs attrs)
2100 MemTxResult res;
2101 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2102 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2104 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2105 switch (size) {
2106 case 1:
2107 address_space_stb(as, addr, val, attrs, &res);
2108 break;
2109 case 2:
2110 address_space_stw(as, addr, val, attrs, &res);
2111 break;
2112 case 4:
2113 address_space_stl(as, addr, val, attrs, &res);
2114 break;
2115 default: abort();
2117 return res;
2120 static const MemoryRegionOps watch_mem_ops = {
2121 .read_with_attrs = watch_mem_read,
2122 .write_with_attrs = watch_mem_write,
2123 .endianness = DEVICE_NATIVE_ENDIAN,
2126 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2127 unsigned len, MemTxAttrs attrs)
2129 subpage_t *subpage = opaque;
2130 uint8_t buf[8];
2131 MemTxResult res;
2133 #if defined(DEBUG_SUBPAGE)
2134 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2135 subpage, len, addr);
2136 #endif
2137 res = address_space_read(subpage->as, addr + subpage->base,
2138 attrs, buf, len);
2139 if (res) {
2140 return res;
2142 switch (len) {
2143 case 1:
2144 *data = ldub_p(buf);
2145 return MEMTX_OK;
2146 case 2:
2147 *data = lduw_p(buf);
2148 return MEMTX_OK;
2149 case 4:
2150 *data = ldl_p(buf);
2151 return MEMTX_OK;
2152 case 8:
2153 *data = ldq_p(buf);
2154 return MEMTX_OK;
2155 default:
2156 abort();
2160 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2161 uint64_t value, unsigned len, MemTxAttrs attrs)
2163 subpage_t *subpage = opaque;
2164 uint8_t buf[8];
2166 #if defined(DEBUG_SUBPAGE)
2167 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2168 " value %"PRIx64"\n",
2169 __func__, subpage, len, addr, value);
2170 #endif
2171 switch (len) {
2172 case 1:
2173 stb_p(buf, value);
2174 break;
2175 case 2:
2176 stw_p(buf, value);
2177 break;
2178 case 4:
2179 stl_p(buf, value);
2180 break;
2181 case 8:
2182 stq_p(buf, value);
2183 break;
2184 default:
2185 abort();
2187 return address_space_write(subpage->as, addr + subpage->base,
2188 attrs, buf, len);
2191 static bool subpage_accepts(void *opaque, hwaddr addr,
2192 unsigned len, bool is_write)
2194 subpage_t *subpage = opaque;
2195 #if defined(DEBUG_SUBPAGE)
2196 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2197 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2198 #endif
2200 return address_space_access_valid(subpage->as, addr + subpage->base,
2201 len, is_write);
2204 static const MemoryRegionOps subpage_ops = {
2205 .read_with_attrs = subpage_read,
2206 .write_with_attrs = subpage_write,
2207 .impl.min_access_size = 1,
2208 .impl.max_access_size = 8,
2209 .valid.min_access_size = 1,
2210 .valid.max_access_size = 8,
2211 .valid.accepts = subpage_accepts,
2212 .endianness = DEVICE_NATIVE_ENDIAN,
2215 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2216 uint16_t section)
2218 int idx, eidx;
2220 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2221 return -1;
2222 idx = SUBPAGE_IDX(start);
2223 eidx = SUBPAGE_IDX(end);
2224 #if defined(DEBUG_SUBPAGE)
2225 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2226 __func__, mmio, start, end, idx, eidx, section);
2227 #endif
2228 for (; idx <= eidx; idx++) {
2229 mmio->sub_section[idx] = section;
2232 return 0;
2235 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2237 subpage_t *mmio;
2239 mmio = g_malloc0(sizeof(subpage_t));
2241 mmio->as = as;
2242 mmio->base = base;
2243 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2244 NULL, TARGET_PAGE_SIZE);
2245 mmio->iomem.subpage = true;
2246 #if defined(DEBUG_SUBPAGE)
2247 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2248 mmio, base, TARGET_PAGE_SIZE);
2249 #endif
2250 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2252 return mmio;
2255 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2256 MemoryRegion *mr)
2258 assert(as);
2259 MemoryRegionSection section = {
2260 .address_space = as,
2261 .mr = mr,
2262 .offset_within_address_space = 0,
2263 .offset_within_region = 0,
2264 .size = int128_2_64(),
2267 return phys_section_add(map, &section);
2270 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2272 int asidx = cpu_asidx_from_attrs(cpu, attrs);
2273 CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2274 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2275 MemoryRegionSection *sections = d->map.sections;
2277 return sections[index & ~TARGET_PAGE_MASK].mr;
2280 static void io_mem_init(void)
2282 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2283 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2284 NULL, UINT64_MAX);
2285 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2286 NULL, UINT64_MAX);
2287 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2288 NULL, UINT64_MAX);
2291 static void mem_begin(MemoryListener *listener)
2293 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2294 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2295 uint16_t n;
2297 n = dummy_section(&d->map, as, &io_mem_unassigned);
2298 assert(n == PHYS_SECTION_UNASSIGNED);
2299 n = dummy_section(&d->map, as, &io_mem_notdirty);
2300 assert(n == PHYS_SECTION_NOTDIRTY);
2301 n = dummy_section(&d->map, as, &io_mem_rom);
2302 assert(n == PHYS_SECTION_ROM);
2303 n = dummy_section(&d->map, as, &io_mem_watch);
2304 assert(n == PHYS_SECTION_WATCH);
2306 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2307 d->as = as;
2308 as->next_dispatch = d;
2311 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2313 phys_sections_free(&d->map);
2314 g_free(d);
2317 static void mem_commit(MemoryListener *listener)
2319 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2320 AddressSpaceDispatch *cur = as->dispatch;
2321 AddressSpaceDispatch *next = as->next_dispatch;
2323 phys_page_compact_all(next, next->map.nodes_nb);
2325 atomic_rcu_set(&as->dispatch, next);
2326 if (cur) {
2327 call_rcu(cur, address_space_dispatch_free, rcu);
2331 static void tcg_commit(MemoryListener *listener)
2333 CPUAddressSpace *cpuas;
2334 AddressSpaceDispatch *d;
2336 /* since each CPU stores ram addresses in its TLB cache, we must
2337 reset the modified entries */
2338 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2339 cpu_reloading_memory_map();
2340 /* The CPU and TLB are protected by the iothread lock.
2341 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2342 * may have split the RCU critical section.
2344 d = atomic_rcu_read(&cpuas->as->dispatch);
2345 cpuas->memory_dispatch = d;
2346 tlb_flush(cpuas->cpu, 1);
2349 void address_space_init_dispatch(AddressSpace *as)
2351 as->dispatch = NULL;
2352 as->dispatch_listener = (MemoryListener) {
2353 .begin = mem_begin,
2354 .commit = mem_commit,
2355 .region_add = mem_add,
2356 .region_nop = mem_add,
2357 .priority = 0,
2359 memory_listener_register(&as->dispatch_listener, as);
2362 void address_space_unregister(AddressSpace *as)
2364 memory_listener_unregister(&as->dispatch_listener);
2367 void address_space_destroy_dispatch(AddressSpace *as)
2369 AddressSpaceDispatch *d = as->dispatch;
2371 atomic_rcu_set(&as->dispatch, NULL);
2372 if (d) {
2373 call_rcu(d, address_space_dispatch_free, rcu);
2377 static void memory_map_init(void)
2379 system_memory = g_malloc(sizeof(*system_memory));
2381 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2382 address_space_init(&address_space_memory, system_memory, "memory");
2384 system_io = g_malloc(sizeof(*system_io));
2385 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2386 65536);
2387 address_space_init(&address_space_io, system_io, "I/O");
2390 MemoryRegion *get_system_memory(void)
2392 return system_memory;
2395 MemoryRegion *get_system_io(void)
2397 return system_io;
2400 #endif /* !defined(CONFIG_USER_ONLY) */
2402 /* physical memory access (slow version, mainly for debug) */
2403 #if defined(CONFIG_USER_ONLY)
2404 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2405 uint8_t *buf, int len, int is_write)
2407 int l, flags;
2408 target_ulong page;
2409 void * p;
2411 while (len > 0) {
2412 page = addr & TARGET_PAGE_MASK;
2413 l = (page + TARGET_PAGE_SIZE) - addr;
2414 if (l > len)
2415 l = len;
2416 flags = page_get_flags(page);
2417 if (!(flags & PAGE_VALID))
2418 return -1;
2419 if (is_write) {
2420 if (!(flags & PAGE_WRITE))
2421 return -1;
2422 /* XXX: this code should not depend on lock_user */
2423 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2424 return -1;
2425 memcpy(p, buf, l);
2426 unlock_user(p, addr, l);
2427 } else {
2428 if (!(flags & PAGE_READ))
2429 return -1;
2430 /* XXX: this code should not depend on lock_user */
2431 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2432 return -1;
2433 memcpy(buf, p, l);
2434 unlock_user(p, addr, 0);
2436 len -= l;
2437 buf += l;
2438 addr += l;
2440 return 0;
2443 #else
2445 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2446 hwaddr length)
2448 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2449 addr += memory_region_get_ram_addr(mr);
2451 /* No early return if dirty_log_mask is or becomes 0, because
2452 * cpu_physical_memory_set_dirty_range will still call
2453 * xen_modified_memory.
2455 if (dirty_log_mask) {
2456 dirty_log_mask =
2457 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2459 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2460 tb_invalidate_phys_range(addr, addr + length);
2461 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2463 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2466 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2468 unsigned access_size_max = mr->ops->valid.max_access_size;
2470 /* Regions are assumed to support 1-4 byte accesses unless
2471 otherwise specified. */
2472 if (access_size_max == 0) {
2473 access_size_max = 4;
2476 /* Bound the maximum access by the alignment of the address. */
2477 if (!mr->ops->impl.unaligned) {
2478 unsigned align_size_max = addr & -addr;
2479 if (align_size_max != 0 && align_size_max < access_size_max) {
2480 access_size_max = align_size_max;
2484 /* Don't attempt accesses larger than the maximum. */
2485 if (l > access_size_max) {
2486 l = access_size_max;
2488 l = pow2floor(l);
2490 return l;
2493 static bool prepare_mmio_access(MemoryRegion *mr)
2495 bool unlocked = !qemu_mutex_iothread_locked();
2496 bool release_lock = false;
2498 if (unlocked && mr->global_locking) {
2499 qemu_mutex_lock_iothread();
2500 unlocked = false;
2501 release_lock = true;
2503 if (mr->flush_coalesced_mmio) {
2504 if (unlocked) {
2505 qemu_mutex_lock_iothread();
2507 qemu_flush_coalesced_mmio_buffer();
2508 if (unlocked) {
2509 qemu_mutex_unlock_iothread();
2513 return release_lock;
2516 /* Called within RCU critical section. */
2517 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2518 MemTxAttrs attrs,
2519 const uint8_t *buf,
2520 int len, hwaddr addr1,
2521 hwaddr l, MemoryRegion *mr)
2523 uint8_t *ptr;
2524 uint64_t val;
2525 MemTxResult result = MEMTX_OK;
2526 bool release_lock = false;
2528 for (;;) {
2529 if (!memory_access_is_direct(mr, true)) {
2530 release_lock |= prepare_mmio_access(mr);
2531 l = memory_access_size(mr, l, addr1);
2532 /* XXX: could force current_cpu to NULL to avoid
2533 potential bugs */
2534 switch (l) {
2535 case 8:
2536 /* 64 bit write access */
2537 val = ldq_p(buf);
2538 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2539 attrs);
2540 break;
2541 case 4:
2542 /* 32 bit write access */
2543 val = ldl_p(buf);
2544 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2545 attrs);
2546 break;
2547 case 2:
2548 /* 16 bit write access */
2549 val = lduw_p(buf);
2550 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2551 attrs);
2552 break;
2553 case 1:
2554 /* 8 bit write access */
2555 val = ldub_p(buf);
2556 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2557 attrs);
2558 break;
2559 default:
2560 abort();
2562 } else {
2563 /* RAM case */
2564 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2565 memcpy(ptr, buf, l);
2566 invalidate_and_set_dirty(mr, addr1, l);
2569 if (release_lock) {
2570 qemu_mutex_unlock_iothread();
2571 release_lock = false;
2574 len -= l;
2575 buf += l;
2576 addr += l;
2578 if (!len) {
2579 break;
2582 l = len;
2583 mr = address_space_translate(as, addr, &addr1, &l, true);
2586 return result;
2589 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2590 const uint8_t *buf, int len)
2592 hwaddr l;
2593 hwaddr addr1;
2594 MemoryRegion *mr;
2595 MemTxResult result = MEMTX_OK;
2597 if (len > 0) {
2598 rcu_read_lock();
2599 l = len;
2600 mr = address_space_translate(as, addr, &addr1, &l, true);
2601 result = address_space_write_continue(as, addr, attrs, buf, len,
2602 addr1, l, mr);
2603 rcu_read_unlock();
2606 return result;
2609 /* Called within RCU critical section. */
2610 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2611 MemTxAttrs attrs, uint8_t *buf,
2612 int len, hwaddr addr1, hwaddr l,
2613 MemoryRegion *mr)
2615 uint8_t *ptr;
2616 uint64_t val;
2617 MemTxResult result = MEMTX_OK;
2618 bool release_lock = false;
2620 for (;;) {
2621 if (!memory_access_is_direct(mr, false)) {
2622 /* I/O case */
2623 release_lock |= prepare_mmio_access(mr);
2624 l = memory_access_size(mr, l, addr1);
2625 switch (l) {
2626 case 8:
2627 /* 64 bit read access */
2628 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2629 attrs);
2630 stq_p(buf, val);
2631 break;
2632 case 4:
2633 /* 32 bit read access */
2634 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2635 attrs);
2636 stl_p(buf, val);
2637 break;
2638 case 2:
2639 /* 16 bit read access */
2640 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2641 attrs);
2642 stw_p(buf, val);
2643 break;
2644 case 1:
2645 /* 8 bit read access */
2646 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2647 attrs);
2648 stb_p(buf, val);
2649 break;
2650 default:
2651 abort();
2653 } else {
2654 /* RAM case */
2655 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2656 memcpy(buf, ptr, l);
2659 if (release_lock) {
2660 qemu_mutex_unlock_iothread();
2661 release_lock = false;
2664 len -= l;
2665 buf += l;
2666 addr += l;
2668 if (!len) {
2669 break;
2672 l = len;
2673 mr = address_space_translate(as, addr, &addr1, &l, false);
2676 return result;
2679 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2680 MemTxAttrs attrs, uint8_t *buf, int len)
2682 hwaddr l;
2683 hwaddr addr1;
2684 MemoryRegion *mr;
2685 MemTxResult result = MEMTX_OK;
2687 if (len > 0) {
2688 rcu_read_lock();
2689 l = len;
2690 mr = address_space_translate(as, addr, &addr1, &l, false);
2691 result = address_space_read_continue(as, addr, attrs, buf, len,
2692 addr1, l, mr);
2693 rcu_read_unlock();
2696 return result;
2699 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2700 uint8_t *buf, int len, bool is_write)
2702 if (is_write) {
2703 return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2704 } else {
2705 return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2709 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2710 int len, int is_write)
2712 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2713 buf, len, is_write);
2716 enum write_rom_type {
2717 WRITE_DATA,
2718 FLUSH_CACHE,
2721 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2722 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2724 hwaddr l;
2725 uint8_t *ptr;
2726 hwaddr addr1;
2727 MemoryRegion *mr;
2729 rcu_read_lock();
2730 while (len > 0) {
2731 l = len;
2732 mr = address_space_translate(as, addr, &addr1, &l, true);
2734 if (!(memory_region_is_ram(mr) ||
2735 memory_region_is_romd(mr))) {
2736 l = memory_access_size(mr, l, addr1);
2737 } else {
2738 /* ROM/RAM case */
2739 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2740 switch (type) {
2741 case WRITE_DATA:
2742 memcpy(ptr, buf, l);
2743 invalidate_and_set_dirty(mr, addr1, l);
2744 break;
2745 case FLUSH_CACHE:
2746 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2747 break;
2750 len -= l;
2751 buf += l;
2752 addr += l;
2754 rcu_read_unlock();
2757 /* used for ROM loading : can write in RAM and ROM */
2758 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2759 const uint8_t *buf, int len)
2761 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2764 void cpu_flush_icache_range(hwaddr start, int len)
2767 * This function should do the same thing as an icache flush that was
2768 * triggered from within the guest. For TCG we are always cache coherent,
2769 * so there is no need to flush anything. For KVM / Xen we need to flush
2770 * the host's instruction cache at least.
2772 if (tcg_enabled()) {
2773 return;
2776 cpu_physical_memory_write_rom_internal(&address_space_memory,
2777 start, NULL, len, FLUSH_CACHE);
2780 typedef struct {
2781 MemoryRegion *mr;
2782 void *buffer;
2783 hwaddr addr;
2784 hwaddr len;
2785 bool in_use;
2786 } BounceBuffer;
2788 static BounceBuffer bounce;
2790 typedef struct MapClient {
2791 QEMUBH *bh;
2792 QLIST_ENTRY(MapClient) link;
2793 } MapClient;
2795 QemuMutex map_client_list_lock;
2796 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2797 = QLIST_HEAD_INITIALIZER(map_client_list);
2799 static void cpu_unregister_map_client_do(MapClient *client)
2801 QLIST_REMOVE(client, link);
2802 g_free(client);
2805 static void cpu_notify_map_clients_locked(void)
2807 MapClient *client;
2809 while (!QLIST_EMPTY(&map_client_list)) {
2810 client = QLIST_FIRST(&map_client_list);
2811 qemu_bh_schedule(client->bh);
2812 cpu_unregister_map_client_do(client);
2816 void cpu_register_map_client(QEMUBH *bh)
2818 MapClient *client = g_malloc(sizeof(*client));
2820 qemu_mutex_lock(&map_client_list_lock);
2821 client->bh = bh;
2822 QLIST_INSERT_HEAD(&map_client_list, client, link);
2823 if (!atomic_read(&bounce.in_use)) {
2824 cpu_notify_map_clients_locked();
2826 qemu_mutex_unlock(&map_client_list_lock);
2829 void cpu_exec_init_all(void)
2831 qemu_mutex_init(&ram_list.mutex);
2832 io_mem_init();
2833 memory_map_init();
2834 qemu_mutex_init(&map_client_list_lock);
2837 void cpu_unregister_map_client(QEMUBH *bh)
2839 MapClient *client;
2841 qemu_mutex_lock(&map_client_list_lock);
2842 QLIST_FOREACH(client, &map_client_list, link) {
2843 if (client->bh == bh) {
2844 cpu_unregister_map_client_do(client);
2845 break;
2848 qemu_mutex_unlock(&map_client_list_lock);
2851 static void cpu_notify_map_clients(void)
2853 qemu_mutex_lock(&map_client_list_lock);
2854 cpu_notify_map_clients_locked();
2855 qemu_mutex_unlock(&map_client_list_lock);
2858 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2860 MemoryRegion *mr;
2861 hwaddr l, xlat;
2863 rcu_read_lock();
2864 while (len > 0) {
2865 l = len;
2866 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2867 if (!memory_access_is_direct(mr, is_write)) {
2868 l = memory_access_size(mr, l, addr);
2869 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2870 return false;
2874 len -= l;
2875 addr += l;
2877 rcu_read_unlock();
2878 return true;
2881 /* Map a physical memory region into a host virtual address.
2882 * May map a subset of the requested range, given by and returned in *plen.
2883 * May return NULL if resources needed to perform the mapping are exhausted.
2884 * Use only for reads OR writes - not for read-modify-write operations.
2885 * Use cpu_register_map_client() to know when retrying the map operation is
2886 * likely to succeed.
2888 void *address_space_map(AddressSpace *as,
2889 hwaddr addr,
2890 hwaddr *plen,
2891 bool is_write)
2893 hwaddr len = *plen;
2894 hwaddr done = 0;
2895 hwaddr l, xlat, base;
2896 MemoryRegion *mr, *this_mr;
2897 void *ptr;
2899 if (len == 0) {
2900 return NULL;
2903 l = len;
2904 rcu_read_lock();
2905 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2907 if (!memory_access_is_direct(mr, is_write)) {
2908 if (atomic_xchg(&bounce.in_use, true)) {
2909 rcu_read_unlock();
2910 return NULL;
2912 /* Avoid unbounded allocations */
2913 l = MIN(l, TARGET_PAGE_SIZE);
2914 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2915 bounce.addr = addr;
2916 bounce.len = l;
2918 memory_region_ref(mr);
2919 bounce.mr = mr;
2920 if (!is_write) {
2921 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2922 bounce.buffer, l);
2925 rcu_read_unlock();
2926 *plen = l;
2927 return bounce.buffer;
2930 base = xlat;
2932 for (;;) {
2933 len -= l;
2934 addr += l;
2935 done += l;
2936 if (len == 0) {
2937 break;
2940 l = len;
2941 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2942 if (this_mr != mr || xlat != base + done) {
2943 break;
2947 memory_region_ref(mr);
2948 *plen = done;
2949 ptr = qemu_ram_ptr_length(mr->ram_block, base, plen);
2950 rcu_read_unlock();
2952 return ptr;
2955 /* Unmaps a memory region previously mapped by address_space_map().
2956 * Will also mark the memory as dirty if is_write == 1. access_len gives
2957 * the amount of memory that was actually read or written by the caller.
2959 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2960 int is_write, hwaddr access_len)
2962 if (buffer != bounce.buffer) {
2963 MemoryRegion *mr;
2964 ram_addr_t addr1;
2966 mr = memory_region_from_host(buffer, &addr1);
2967 assert(mr != NULL);
2968 if (is_write) {
2969 invalidate_and_set_dirty(mr, addr1, access_len);
2971 if (xen_enabled()) {
2972 xen_invalidate_map_cache_entry(buffer);
2974 memory_region_unref(mr);
2975 return;
2977 if (is_write) {
2978 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
2979 bounce.buffer, access_len);
2981 qemu_vfree(bounce.buffer);
2982 bounce.buffer = NULL;
2983 memory_region_unref(bounce.mr);
2984 atomic_mb_set(&bounce.in_use, false);
2985 cpu_notify_map_clients();
2988 void *cpu_physical_memory_map(hwaddr addr,
2989 hwaddr *plen,
2990 int is_write)
2992 return address_space_map(&address_space_memory, addr, plen, is_write);
2995 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2996 int is_write, hwaddr access_len)
2998 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3001 /* warning: addr must be aligned */
3002 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3003 MemTxAttrs attrs,
3004 MemTxResult *result,
3005 enum device_endian endian)
3007 uint8_t *ptr;
3008 uint64_t val;
3009 MemoryRegion *mr;
3010 hwaddr l = 4;
3011 hwaddr addr1;
3012 MemTxResult r;
3013 bool release_lock = false;
3015 rcu_read_lock();
3016 mr = address_space_translate(as, addr, &addr1, &l, false);
3017 if (l < 4 || !memory_access_is_direct(mr, false)) {
3018 release_lock |= prepare_mmio_access(mr);
3020 /* I/O case */
3021 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3022 #if defined(TARGET_WORDS_BIGENDIAN)
3023 if (endian == DEVICE_LITTLE_ENDIAN) {
3024 val = bswap32(val);
3026 #else
3027 if (endian == DEVICE_BIG_ENDIAN) {
3028 val = bswap32(val);
3030 #endif
3031 } else {
3032 /* RAM case */
3033 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3034 switch (endian) {
3035 case DEVICE_LITTLE_ENDIAN:
3036 val = ldl_le_p(ptr);
3037 break;
3038 case DEVICE_BIG_ENDIAN:
3039 val = ldl_be_p(ptr);
3040 break;
3041 default:
3042 val = ldl_p(ptr);
3043 break;
3045 r = MEMTX_OK;
3047 if (result) {
3048 *result = r;
3050 if (release_lock) {
3051 qemu_mutex_unlock_iothread();
3053 rcu_read_unlock();
3054 return val;
3057 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3058 MemTxAttrs attrs, MemTxResult *result)
3060 return address_space_ldl_internal(as, addr, attrs, result,
3061 DEVICE_NATIVE_ENDIAN);
3064 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3065 MemTxAttrs attrs, MemTxResult *result)
3067 return address_space_ldl_internal(as, addr, attrs, result,
3068 DEVICE_LITTLE_ENDIAN);
3071 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3072 MemTxAttrs attrs, MemTxResult *result)
3074 return address_space_ldl_internal(as, addr, attrs, result,
3075 DEVICE_BIG_ENDIAN);
3078 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3080 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3083 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3085 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3088 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3090 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3093 /* warning: addr must be aligned */
3094 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3095 MemTxAttrs attrs,
3096 MemTxResult *result,
3097 enum device_endian endian)
3099 uint8_t *ptr;
3100 uint64_t val;
3101 MemoryRegion *mr;
3102 hwaddr l = 8;
3103 hwaddr addr1;
3104 MemTxResult r;
3105 bool release_lock = false;
3107 rcu_read_lock();
3108 mr = address_space_translate(as, addr, &addr1, &l,
3109 false);
3110 if (l < 8 || !memory_access_is_direct(mr, false)) {
3111 release_lock |= prepare_mmio_access(mr);
3113 /* I/O case */
3114 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3115 #if defined(TARGET_WORDS_BIGENDIAN)
3116 if (endian == DEVICE_LITTLE_ENDIAN) {
3117 val = bswap64(val);
3119 #else
3120 if (endian == DEVICE_BIG_ENDIAN) {
3121 val = bswap64(val);
3123 #endif
3124 } else {
3125 /* RAM case */
3126 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3127 switch (endian) {
3128 case DEVICE_LITTLE_ENDIAN:
3129 val = ldq_le_p(ptr);
3130 break;
3131 case DEVICE_BIG_ENDIAN:
3132 val = ldq_be_p(ptr);
3133 break;
3134 default:
3135 val = ldq_p(ptr);
3136 break;
3138 r = MEMTX_OK;
3140 if (result) {
3141 *result = r;
3143 if (release_lock) {
3144 qemu_mutex_unlock_iothread();
3146 rcu_read_unlock();
3147 return val;
3150 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3151 MemTxAttrs attrs, MemTxResult *result)
3153 return address_space_ldq_internal(as, addr, attrs, result,
3154 DEVICE_NATIVE_ENDIAN);
3157 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3158 MemTxAttrs attrs, MemTxResult *result)
3160 return address_space_ldq_internal(as, addr, attrs, result,
3161 DEVICE_LITTLE_ENDIAN);
3164 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3165 MemTxAttrs attrs, MemTxResult *result)
3167 return address_space_ldq_internal(as, addr, attrs, result,
3168 DEVICE_BIG_ENDIAN);
3171 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3173 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3176 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3178 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3181 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3183 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3186 /* XXX: optimize */
3187 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3188 MemTxAttrs attrs, MemTxResult *result)
3190 uint8_t val;
3191 MemTxResult r;
3193 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3194 if (result) {
3195 *result = r;
3197 return val;
3200 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3202 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3205 /* warning: addr must be aligned */
3206 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3207 hwaddr addr,
3208 MemTxAttrs attrs,
3209 MemTxResult *result,
3210 enum device_endian endian)
3212 uint8_t *ptr;
3213 uint64_t val;
3214 MemoryRegion *mr;
3215 hwaddr l = 2;
3216 hwaddr addr1;
3217 MemTxResult r;
3218 bool release_lock = false;
3220 rcu_read_lock();
3221 mr = address_space_translate(as, addr, &addr1, &l,
3222 false);
3223 if (l < 2 || !memory_access_is_direct(mr, false)) {
3224 release_lock |= prepare_mmio_access(mr);
3226 /* I/O case */
3227 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3228 #if defined(TARGET_WORDS_BIGENDIAN)
3229 if (endian == DEVICE_LITTLE_ENDIAN) {
3230 val = bswap16(val);
3232 #else
3233 if (endian == DEVICE_BIG_ENDIAN) {
3234 val = bswap16(val);
3236 #endif
3237 } else {
3238 /* RAM case */
3239 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3240 switch (endian) {
3241 case DEVICE_LITTLE_ENDIAN:
3242 val = lduw_le_p(ptr);
3243 break;
3244 case DEVICE_BIG_ENDIAN:
3245 val = lduw_be_p(ptr);
3246 break;
3247 default:
3248 val = lduw_p(ptr);
3249 break;
3251 r = MEMTX_OK;
3253 if (result) {
3254 *result = r;
3256 if (release_lock) {
3257 qemu_mutex_unlock_iothread();
3259 rcu_read_unlock();
3260 return val;
3263 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3264 MemTxAttrs attrs, MemTxResult *result)
3266 return address_space_lduw_internal(as, addr, attrs, result,
3267 DEVICE_NATIVE_ENDIAN);
3270 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3271 MemTxAttrs attrs, MemTxResult *result)
3273 return address_space_lduw_internal(as, addr, attrs, result,
3274 DEVICE_LITTLE_ENDIAN);
3277 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3278 MemTxAttrs attrs, MemTxResult *result)
3280 return address_space_lduw_internal(as, addr, attrs, result,
3281 DEVICE_BIG_ENDIAN);
3284 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3286 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3289 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3291 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3294 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3296 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3299 /* warning: addr must be aligned. The ram page is not masked as dirty
3300 and the code inside is not invalidated. It is useful if the dirty
3301 bits are used to track modified PTEs */
3302 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3303 MemTxAttrs attrs, MemTxResult *result)
3305 uint8_t *ptr;
3306 MemoryRegion *mr;
3307 hwaddr l = 4;
3308 hwaddr addr1;
3309 MemTxResult r;
3310 uint8_t dirty_log_mask;
3311 bool release_lock = false;
3313 rcu_read_lock();
3314 mr = address_space_translate(as, addr, &addr1, &l,
3315 true);
3316 if (l < 4 || !memory_access_is_direct(mr, true)) {
3317 release_lock |= prepare_mmio_access(mr);
3319 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3320 } else {
3321 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3322 stl_p(ptr, val);
3324 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3325 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3326 cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr,
3327 4, dirty_log_mask);
3328 r = MEMTX_OK;
3330 if (result) {
3331 *result = r;
3333 if (release_lock) {
3334 qemu_mutex_unlock_iothread();
3336 rcu_read_unlock();
3339 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3341 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3344 /* warning: addr must be aligned */
3345 static inline void address_space_stl_internal(AddressSpace *as,
3346 hwaddr addr, uint32_t val,
3347 MemTxAttrs attrs,
3348 MemTxResult *result,
3349 enum device_endian endian)
3351 uint8_t *ptr;
3352 MemoryRegion *mr;
3353 hwaddr l = 4;
3354 hwaddr addr1;
3355 MemTxResult r;
3356 bool release_lock = false;
3358 rcu_read_lock();
3359 mr = address_space_translate(as, addr, &addr1, &l,
3360 true);
3361 if (l < 4 || !memory_access_is_direct(mr, true)) {
3362 release_lock |= prepare_mmio_access(mr);
3364 #if defined(TARGET_WORDS_BIGENDIAN)
3365 if (endian == DEVICE_LITTLE_ENDIAN) {
3366 val = bswap32(val);
3368 #else
3369 if (endian == DEVICE_BIG_ENDIAN) {
3370 val = bswap32(val);
3372 #endif
3373 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3374 } else {
3375 /* RAM case */
3376 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3377 switch (endian) {
3378 case DEVICE_LITTLE_ENDIAN:
3379 stl_le_p(ptr, val);
3380 break;
3381 case DEVICE_BIG_ENDIAN:
3382 stl_be_p(ptr, val);
3383 break;
3384 default:
3385 stl_p(ptr, val);
3386 break;
3388 invalidate_and_set_dirty(mr, addr1, 4);
3389 r = MEMTX_OK;
3391 if (result) {
3392 *result = r;
3394 if (release_lock) {
3395 qemu_mutex_unlock_iothread();
3397 rcu_read_unlock();
3400 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3401 MemTxAttrs attrs, MemTxResult *result)
3403 address_space_stl_internal(as, addr, val, attrs, result,
3404 DEVICE_NATIVE_ENDIAN);
3407 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3408 MemTxAttrs attrs, MemTxResult *result)
3410 address_space_stl_internal(as, addr, val, attrs, result,
3411 DEVICE_LITTLE_ENDIAN);
3414 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3415 MemTxAttrs attrs, MemTxResult *result)
3417 address_space_stl_internal(as, addr, val, attrs, result,
3418 DEVICE_BIG_ENDIAN);
3421 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3423 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3426 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3428 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3431 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3433 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3436 /* XXX: optimize */
3437 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3438 MemTxAttrs attrs, MemTxResult *result)
3440 uint8_t v = val;
3441 MemTxResult r;
3443 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3444 if (result) {
3445 *result = r;
3449 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3451 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3454 /* warning: addr must be aligned */
3455 static inline void address_space_stw_internal(AddressSpace *as,
3456 hwaddr addr, uint32_t val,
3457 MemTxAttrs attrs,
3458 MemTxResult *result,
3459 enum device_endian endian)
3461 uint8_t *ptr;
3462 MemoryRegion *mr;
3463 hwaddr l = 2;
3464 hwaddr addr1;
3465 MemTxResult r;
3466 bool release_lock = false;
3468 rcu_read_lock();
3469 mr = address_space_translate(as, addr, &addr1, &l, true);
3470 if (l < 2 || !memory_access_is_direct(mr, true)) {
3471 release_lock |= prepare_mmio_access(mr);
3473 #if defined(TARGET_WORDS_BIGENDIAN)
3474 if (endian == DEVICE_LITTLE_ENDIAN) {
3475 val = bswap16(val);
3477 #else
3478 if (endian == DEVICE_BIG_ENDIAN) {
3479 val = bswap16(val);
3481 #endif
3482 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3483 } else {
3484 /* RAM case */
3485 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3486 switch (endian) {
3487 case DEVICE_LITTLE_ENDIAN:
3488 stw_le_p(ptr, val);
3489 break;
3490 case DEVICE_BIG_ENDIAN:
3491 stw_be_p(ptr, val);
3492 break;
3493 default:
3494 stw_p(ptr, val);
3495 break;
3497 invalidate_and_set_dirty(mr, addr1, 2);
3498 r = MEMTX_OK;
3500 if (result) {
3501 *result = r;
3503 if (release_lock) {
3504 qemu_mutex_unlock_iothread();
3506 rcu_read_unlock();
3509 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3510 MemTxAttrs attrs, MemTxResult *result)
3512 address_space_stw_internal(as, addr, val, attrs, result,
3513 DEVICE_NATIVE_ENDIAN);
3516 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3517 MemTxAttrs attrs, MemTxResult *result)
3519 address_space_stw_internal(as, addr, val, attrs, result,
3520 DEVICE_LITTLE_ENDIAN);
3523 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3524 MemTxAttrs attrs, MemTxResult *result)
3526 address_space_stw_internal(as, addr, val, attrs, result,
3527 DEVICE_BIG_ENDIAN);
3530 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3532 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3535 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3537 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3540 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3542 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3545 /* XXX: optimize */
3546 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3547 MemTxAttrs attrs, MemTxResult *result)
3549 MemTxResult r;
3550 val = tswap64(val);
3551 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3552 if (result) {
3553 *result = r;
3557 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3558 MemTxAttrs attrs, MemTxResult *result)
3560 MemTxResult r;
3561 val = cpu_to_le64(val);
3562 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3563 if (result) {
3564 *result = r;
3567 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3568 MemTxAttrs attrs, MemTxResult *result)
3570 MemTxResult r;
3571 val = cpu_to_be64(val);
3572 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3573 if (result) {
3574 *result = r;
3578 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3580 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3583 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3585 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3588 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3590 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3593 /* virtual memory access for debug (includes writing to ROM) */
3594 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3595 uint8_t *buf, int len, int is_write)
3597 int l;
3598 hwaddr phys_addr;
3599 target_ulong page;
3601 while (len > 0) {
3602 int asidx;
3603 MemTxAttrs attrs;
3605 page = addr & TARGET_PAGE_MASK;
3606 phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3607 asidx = cpu_asidx_from_attrs(cpu, attrs);
3608 /* if no physical page mapped, return an error */
3609 if (phys_addr == -1)
3610 return -1;
3611 l = (page + TARGET_PAGE_SIZE) - addr;
3612 if (l > len)
3613 l = len;
3614 phys_addr += (addr & ~TARGET_PAGE_MASK);
3615 if (is_write) {
3616 cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3617 phys_addr, buf, l);
3618 } else {
3619 address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3620 MEMTXATTRS_UNSPECIFIED,
3621 buf, l, 0);
3623 len -= l;
3624 buf += l;
3625 addr += l;
3627 return 0;
3631 * Allows code that needs to deal with migration bitmaps etc to still be built
3632 * target independent.
3634 size_t qemu_target_page_bits(void)
3636 return TARGET_PAGE_BITS;
3639 #endif
3642 * A helper function for the _utterly broken_ virtio device model to find out if
3643 * it's running on a big endian machine. Don't do this at home kids!
3645 bool target_words_bigendian(void);
3646 bool target_words_bigendian(void)
3648 #if defined(TARGET_WORDS_BIGENDIAN)
3649 return true;
3650 #else
3651 return false;
3652 #endif
3655 #ifndef CONFIG_USER_ONLY
3656 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3658 MemoryRegion*mr;
3659 hwaddr l = 1;
3660 bool res;
3662 rcu_read_lock();
3663 mr = address_space_translate(&address_space_memory,
3664 phys_addr, &phys_addr, &l, false);
3666 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3667 rcu_read_unlock();
3668 return res;
3671 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3673 RAMBlock *block;
3674 int ret = 0;
3676 rcu_read_lock();
3677 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3678 ret = func(block->idstr, block->host, block->offset,
3679 block->used_length, opaque);
3680 if (ret) {
3681 break;
3684 rcu_read_unlock();
3685 return ret;
3687 #endif