exec: Do vmstate unregistration from cpu_exec_exit()
[qemu.git] / exec.c
blob7261172929c6508c80c46fdc289c1cd87f62fe86
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include "qapi/error.h"
21 #ifndef _WIN32
22 #include <sys/mman.h>
23 #endif
25 #include "qemu/cutils.h"
26 #include "cpu.h"
27 #include "exec/exec-all.h"
28 #include "tcg.h"
29 #include "hw/qdev-core.h"
30 #if !defined(CONFIG_USER_ONLY)
31 #include "hw/boards.h"
32 #include "hw/xen/xen.h"
33 #endif
34 #include "sysemu/kvm.h"
35 #include "sysemu/sysemu.h"
36 #include "qemu/timer.h"
37 #include "qemu/config-file.h"
38 #include "qemu/error-report.h"
39 #if defined(CONFIG_USER_ONLY)
40 #include <qemu.h>
41 #else /* !CONFIG_USER_ONLY */
42 #include "hw/hw.h"
43 #include "exec/memory.h"
44 #include "exec/ioport.h"
45 #include "sysemu/dma.h"
46 #include "exec/address-spaces.h"
47 #include "sysemu/xen-mapcache.h"
48 #include "trace.h"
49 #endif
50 #include "exec/cpu-all.h"
51 #include "qemu/rcu_queue.h"
52 #include "qemu/main-loop.h"
53 #include "translate-all.h"
54 #include "sysemu/replay.h"
56 #include "exec/memory-internal.h"
57 #include "exec/ram_addr.h"
58 #include "exec/log.h"
60 #include "migration/vmstate.h"
62 #include "qemu/range.h"
63 #ifndef _WIN32
64 #include "qemu/mmap-alloc.h"
65 #endif
67 //#define DEBUG_SUBPAGE
69 #if !defined(CONFIG_USER_ONLY)
70 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
71 * are protected by the ramlist lock.
73 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
75 static MemoryRegion *system_memory;
76 static MemoryRegion *system_io;
78 AddressSpace address_space_io;
79 AddressSpace address_space_memory;
81 MemoryRegion io_mem_rom, io_mem_notdirty;
82 static MemoryRegion io_mem_unassigned;
84 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
85 #define RAM_PREALLOC (1 << 0)
87 /* RAM is mmap-ed with MAP_SHARED */
88 #define RAM_SHARED (1 << 1)
90 /* Only a portion of RAM (used_length) is actually used, and migrated.
91 * This used_length size can change across reboots.
93 #define RAM_RESIZEABLE (1 << 2)
95 #endif
97 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
98 /* current CPU in the current thread. It is only valid inside
99 cpu_exec() */
100 __thread CPUState *current_cpu;
101 /* 0 = Do not count executed instructions.
102 1 = Precise instruction counting.
103 2 = Adaptive rate instruction counting. */
104 int use_icount;
106 #if !defined(CONFIG_USER_ONLY)
108 typedef struct PhysPageEntry PhysPageEntry;
110 struct PhysPageEntry {
111 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
112 uint32_t skip : 6;
113 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
114 uint32_t ptr : 26;
117 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
119 /* Size of the L2 (and L3, etc) page tables. */
120 #define ADDR_SPACE_BITS 64
122 #define P_L2_BITS 9
123 #define P_L2_SIZE (1 << P_L2_BITS)
125 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
127 typedef PhysPageEntry Node[P_L2_SIZE];
129 typedef struct PhysPageMap {
130 struct rcu_head rcu;
132 unsigned sections_nb;
133 unsigned sections_nb_alloc;
134 unsigned nodes_nb;
135 unsigned nodes_nb_alloc;
136 Node *nodes;
137 MemoryRegionSection *sections;
138 } PhysPageMap;
140 struct AddressSpaceDispatch {
141 struct rcu_head rcu;
143 MemoryRegionSection *mru_section;
144 /* This is a multi-level map on the physical address space.
145 * The bottom level has pointers to MemoryRegionSections.
147 PhysPageEntry phys_map;
148 PhysPageMap map;
149 AddressSpace *as;
152 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
153 typedef struct subpage_t {
154 MemoryRegion iomem;
155 AddressSpace *as;
156 hwaddr base;
157 uint16_t sub_section[TARGET_PAGE_SIZE];
158 } subpage_t;
160 #define PHYS_SECTION_UNASSIGNED 0
161 #define PHYS_SECTION_NOTDIRTY 1
162 #define PHYS_SECTION_ROM 2
163 #define PHYS_SECTION_WATCH 3
165 static void io_mem_init(void);
166 static void memory_map_init(void);
167 static void tcg_commit(MemoryListener *listener);
169 static MemoryRegion io_mem_watch;
172 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
173 * @cpu: the CPU whose AddressSpace this is
174 * @as: the AddressSpace itself
175 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
176 * @tcg_as_listener: listener for tracking changes to the AddressSpace
178 struct CPUAddressSpace {
179 CPUState *cpu;
180 AddressSpace *as;
181 struct AddressSpaceDispatch *memory_dispatch;
182 MemoryListener tcg_as_listener;
185 #endif
187 #if !defined(CONFIG_USER_ONLY)
189 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
191 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
192 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
193 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
194 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
198 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
200 unsigned i;
201 uint32_t ret;
202 PhysPageEntry e;
203 PhysPageEntry *p;
205 ret = map->nodes_nb++;
206 p = map->nodes[ret];
207 assert(ret != PHYS_MAP_NODE_NIL);
208 assert(ret != map->nodes_nb_alloc);
210 e.skip = leaf ? 0 : 1;
211 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
212 for (i = 0; i < P_L2_SIZE; ++i) {
213 memcpy(&p[i], &e, sizeof(e));
215 return ret;
218 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
219 hwaddr *index, hwaddr *nb, uint16_t leaf,
220 int level)
222 PhysPageEntry *p;
223 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
225 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
226 lp->ptr = phys_map_node_alloc(map, level == 0);
228 p = map->nodes[lp->ptr];
229 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
231 while (*nb && lp < &p[P_L2_SIZE]) {
232 if ((*index & (step - 1)) == 0 && *nb >= step) {
233 lp->skip = 0;
234 lp->ptr = leaf;
235 *index += step;
236 *nb -= step;
237 } else {
238 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
240 ++lp;
244 static void phys_page_set(AddressSpaceDispatch *d,
245 hwaddr index, hwaddr nb,
246 uint16_t leaf)
248 /* Wildly overreserve - it doesn't matter much. */
249 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
251 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
254 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
255 * and update our entry so we can skip it and go directly to the destination.
257 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
259 unsigned valid_ptr = P_L2_SIZE;
260 int valid = 0;
261 PhysPageEntry *p;
262 int i;
264 if (lp->ptr == PHYS_MAP_NODE_NIL) {
265 return;
268 p = nodes[lp->ptr];
269 for (i = 0; i < P_L2_SIZE; i++) {
270 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
271 continue;
274 valid_ptr = i;
275 valid++;
276 if (p[i].skip) {
277 phys_page_compact(&p[i], nodes, compacted);
281 /* We can only compress if there's only one child. */
282 if (valid != 1) {
283 return;
286 assert(valid_ptr < P_L2_SIZE);
288 /* Don't compress if it won't fit in the # of bits we have. */
289 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
290 return;
293 lp->ptr = p[valid_ptr].ptr;
294 if (!p[valid_ptr].skip) {
295 /* If our only child is a leaf, make this a leaf. */
296 /* By design, we should have made this node a leaf to begin with so we
297 * should never reach here.
298 * But since it's so simple to handle this, let's do it just in case we
299 * change this rule.
301 lp->skip = 0;
302 } else {
303 lp->skip += p[valid_ptr].skip;
307 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
309 DECLARE_BITMAP(compacted, nodes_nb);
311 if (d->phys_map.skip) {
312 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
316 static inline bool section_covers_addr(const MemoryRegionSection *section,
317 hwaddr addr)
319 /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
320 * the section must cover the entire address space.
322 return section->size.hi ||
323 range_covers_byte(section->offset_within_address_space,
324 section->size.lo, addr);
327 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
328 Node *nodes, MemoryRegionSection *sections)
330 PhysPageEntry *p;
331 hwaddr index = addr >> TARGET_PAGE_BITS;
332 int i;
334 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
335 if (lp.ptr == PHYS_MAP_NODE_NIL) {
336 return &sections[PHYS_SECTION_UNASSIGNED];
338 p = nodes[lp.ptr];
339 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
342 if (section_covers_addr(&sections[lp.ptr], addr)) {
343 return &sections[lp.ptr];
344 } else {
345 return &sections[PHYS_SECTION_UNASSIGNED];
349 bool memory_region_is_unassigned(MemoryRegion *mr)
351 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
352 && mr != &io_mem_watch;
355 /* Called from RCU critical section */
356 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
357 hwaddr addr,
358 bool resolve_subpage)
360 MemoryRegionSection *section = atomic_read(&d->mru_section);
361 subpage_t *subpage;
362 bool update;
364 if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
365 section_covers_addr(section, addr)) {
366 update = false;
367 } else {
368 section = phys_page_find(d->phys_map, addr, d->map.nodes,
369 d->map.sections);
370 update = true;
372 if (resolve_subpage && section->mr->subpage) {
373 subpage = container_of(section->mr, subpage_t, iomem);
374 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
376 if (update) {
377 atomic_set(&d->mru_section, section);
379 return section;
382 /* Called from RCU critical section */
383 static MemoryRegionSection *
384 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
385 hwaddr *plen, bool resolve_subpage)
387 MemoryRegionSection *section;
388 MemoryRegion *mr;
389 Int128 diff;
391 section = address_space_lookup_region(d, addr, resolve_subpage);
392 /* Compute offset within MemoryRegionSection */
393 addr -= section->offset_within_address_space;
395 /* Compute offset within MemoryRegion */
396 *xlat = addr + section->offset_within_region;
398 mr = section->mr;
400 /* MMIO registers can be expected to perform full-width accesses based only
401 * on their address, without considering adjacent registers that could
402 * decode to completely different MemoryRegions. When such registers
403 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
404 * regions overlap wildly. For this reason we cannot clamp the accesses
405 * here.
407 * If the length is small (as is the case for address_space_ldl/stl),
408 * everything works fine. If the incoming length is large, however,
409 * the caller really has to do the clamping through memory_access_size.
411 if (memory_region_is_ram(mr)) {
412 diff = int128_sub(section->size, int128_make64(addr));
413 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
415 return section;
418 /* Called from RCU critical section */
419 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
420 hwaddr *xlat, hwaddr *plen,
421 bool is_write)
423 IOMMUTLBEntry iotlb;
424 MemoryRegionSection *section;
425 MemoryRegion *mr;
427 for (;;) {
428 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
429 section = address_space_translate_internal(d, addr, &addr, plen, true);
430 mr = section->mr;
432 if (!mr->iommu_ops) {
433 break;
436 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
437 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
438 | (addr & iotlb.addr_mask));
439 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
440 if (!(iotlb.perm & (1 << is_write))) {
441 mr = &io_mem_unassigned;
442 break;
445 as = iotlb.target_as;
448 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
449 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
450 *plen = MIN(page, *plen);
453 *xlat = addr;
454 return mr;
457 /* Called from RCU critical section */
458 MemoryRegionSection *
459 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
460 hwaddr *xlat, hwaddr *plen)
462 MemoryRegionSection *section;
463 AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;
465 section = address_space_translate_internal(d, addr, xlat, plen, false);
467 assert(!section->mr->iommu_ops);
468 return section;
470 #endif
472 #if !defined(CONFIG_USER_ONLY)
474 static int cpu_common_post_load(void *opaque, int version_id)
476 CPUState *cpu = opaque;
478 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
479 version_id is increased. */
480 cpu->interrupt_request &= ~0x01;
481 tlb_flush(cpu, 1);
483 return 0;
486 static int cpu_common_pre_load(void *opaque)
488 CPUState *cpu = opaque;
490 cpu->exception_index = -1;
492 return 0;
495 static bool cpu_common_exception_index_needed(void *opaque)
497 CPUState *cpu = opaque;
499 return tcg_enabled() && cpu->exception_index != -1;
502 static const VMStateDescription vmstate_cpu_common_exception_index = {
503 .name = "cpu_common/exception_index",
504 .version_id = 1,
505 .minimum_version_id = 1,
506 .needed = cpu_common_exception_index_needed,
507 .fields = (VMStateField[]) {
508 VMSTATE_INT32(exception_index, CPUState),
509 VMSTATE_END_OF_LIST()
513 static bool cpu_common_crash_occurred_needed(void *opaque)
515 CPUState *cpu = opaque;
517 return cpu->crash_occurred;
520 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
521 .name = "cpu_common/crash_occurred",
522 .version_id = 1,
523 .minimum_version_id = 1,
524 .needed = cpu_common_crash_occurred_needed,
525 .fields = (VMStateField[]) {
526 VMSTATE_BOOL(crash_occurred, CPUState),
527 VMSTATE_END_OF_LIST()
531 const VMStateDescription vmstate_cpu_common = {
532 .name = "cpu_common",
533 .version_id = 1,
534 .minimum_version_id = 1,
535 .pre_load = cpu_common_pre_load,
536 .post_load = cpu_common_post_load,
537 .fields = (VMStateField[]) {
538 VMSTATE_UINT32(halted, CPUState),
539 VMSTATE_UINT32(interrupt_request, CPUState),
540 VMSTATE_END_OF_LIST()
542 .subsections = (const VMStateDescription*[]) {
543 &vmstate_cpu_common_exception_index,
544 &vmstate_cpu_common_crash_occurred,
545 NULL
549 #endif
551 CPUState *qemu_get_cpu(int index)
553 CPUState *cpu;
555 CPU_FOREACH(cpu) {
556 if (cpu->cpu_index == index) {
557 return cpu;
561 return NULL;
564 #if !defined(CONFIG_USER_ONLY)
565 void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
567 CPUAddressSpace *newas;
569 /* Target code should have set num_ases before calling us */
570 assert(asidx < cpu->num_ases);
572 if (asidx == 0) {
573 /* address space 0 gets the convenience alias */
574 cpu->as = as;
577 /* KVM cannot currently support multiple address spaces. */
578 assert(asidx == 0 || !kvm_enabled());
580 if (!cpu->cpu_ases) {
581 cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
584 newas = &cpu->cpu_ases[asidx];
585 newas->cpu = cpu;
586 newas->as = as;
587 if (tcg_enabled()) {
588 newas->tcg_as_listener.commit = tcg_commit;
589 memory_listener_register(&newas->tcg_as_listener, as);
593 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
595 /* Return the AddressSpace corresponding to the specified index */
596 return cpu->cpu_ases[asidx].as;
598 #endif
600 #ifndef CONFIG_USER_ONLY
601 static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
603 static int cpu_get_free_index(Error **errp)
605 int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
607 if (cpu >= MAX_CPUMASK_BITS) {
608 error_setg(errp, "Trying to use more CPUs than max of %d",
609 MAX_CPUMASK_BITS);
610 return -1;
613 bitmap_set(cpu_index_map, cpu, 1);
614 return cpu;
617 static void cpu_release_index(CPUState *cpu)
619 bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
621 #else
623 static int cpu_get_free_index(Error **errp)
625 CPUState *some_cpu;
626 int cpu_index = 0;
628 CPU_FOREACH(some_cpu) {
629 cpu_index++;
631 return cpu_index;
634 static void cpu_release_index(CPUState *cpu)
636 return;
638 #endif
640 void cpu_exec_exit(CPUState *cpu)
642 CPUClass *cc = CPU_GET_CLASS(cpu);
644 #if defined(CONFIG_USER_ONLY)
645 cpu_list_lock();
646 #endif
647 if (cpu->cpu_index == -1) {
648 /* cpu_index was never allocated by this @cpu or was already freed. */
649 #if defined(CONFIG_USER_ONLY)
650 cpu_list_unlock();
651 #endif
652 return;
655 QTAILQ_REMOVE(&cpus, cpu, node);
656 cpu_release_index(cpu);
657 cpu->cpu_index = -1;
658 #if defined(CONFIG_USER_ONLY)
659 cpu_list_unlock();
660 #endif
662 if (cc->vmsd != NULL) {
663 vmstate_unregister(NULL, cc->vmsd, cpu);
665 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
666 vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
670 void cpu_exec_init(CPUState *cpu, Error **errp)
672 CPUClass *cc = CPU_GET_CLASS(cpu);
673 Error *local_err = NULL;
675 cpu->as = NULL;
676 cpu->num_ases = 0;
678 #ifndef CONFIG_USER_ONLY
679 cpu->thread_id = qemu_get_thread_id();
681 /* This is a softmmu CPU object, so create a property for it
682 * so users can wire up its memory. (This can't go in qom/cpu.c
683 * because that file is compiled only once for both user-mode
684 * and system builds.) The default if no link is set up is to use
685 * the system address space.
687 object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
688 (Object **)&cpu->memory,
689 qdev_prop_allow_set_link_before_realize,
690 OBJ_PROP_LINK_UNREF_ON_RELEASE,
691 &error_abort);
692 cpu->memory = system_memory;
693 object_ref(OBJECT(cpu->memory));
694 #endif
696 #if defined(CONFIG_USER_ONLY)
697 cpu_list_lock();
698 #endif
699 cpu->cpu_index = cpu_get_free_index(&local_err);
700 if (local_err) {
701 error_propagate(errp, local_err);
702 #if defined(CONFIG_USER_ONLY)
703 cpu_list_unlock();
704 #endif
705 return;
707 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
708 #if defined(CONFIG_USER_ONLY)
709 (void) cc;
710 cpu_list_unlock();
711 #else
712 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
713 vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
715 if (cc->vmsd != NULL) {
716 vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
718 #endif
721 #if defined(CONFIG_USER_ONLY)
722 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
724 tb_invalidate_phys_page_range(pc, pc + 1, 0);
726 #else
727 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
729 MemTxAttrs attrs;
730 hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
731 int asidx = cpu_asidx_from_attrs(cpu, attrs);
732 if (phys != -1) {
733 tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
734 phys | (pc & ~TARGET_PAGE_MASK));
737 #endif
739 #if defined(CONFIG_USER_ONLY)
740 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
745 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
746 int flags)
748 return -ENOSYS;
751 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
755 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
756 int flags, CPUWatchpoint **watchpoint)
758 return -ENOSYS;
760 #else
761 /* Add a watchpoint. */
762 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
763 int flags, CPUWatchpoint **watchpoint)
765 CPUWatchpoint *wp;
767 /* forbid ranges which are empty or run off the end of the address space */
768 if (len == 0 || (addr + len - 1) < addr) {
769 error_report("tried to set invalid watchpoint at %"
770 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
771 return -EINVAL;
773 wp = g_malloc(sizeof(*wp));
775 wp->vaddr = addr;
776 wp->len = len;
777 wp->flags = flags;
779 /* keep all GDB-injected watchpoints in front */
780 if (flags & BP_GDB) {
781 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
782 } else {
783 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
786 tlb_flush_page(cpu, addr);
788 if (watchpoint)
789 *watchpoint = wp;
790 return 0;
793 /* Remove a specific watchpoint. */
794 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
795 int flags)
797 CPUWatchpoint *wp;
799 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
800 if (addr == wp->vaddr && len == wp->len
801 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
802 cpu_watchpoint_remove_by_ref(cpu, wp);
803 return 0;
806 return -ENOENT;
809 /* Remove a specific watchpoint by reference. */
810 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
812 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
814 tlb_flush_page(cpu, watchpoint->vaddr);
816 g_free(watchpoint);
819 /* Remove all matching watchpoints. */
820 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
822 CPUWatchpoint *wp, *next;
824 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
825 if (wp->flags & mask) {
826 cpu_watchpoint_remove_by_ref(cpu, wp);
831 /* Return true if this watchpoint address matches the specified
832 * access (ie the address range covered by the watchpoint overlaps
833 * partially or completely with the address range covered by the
834 * access).
836 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
837 vaddr addr,
838 vaddr len)
840 /* We know the lengths are non-zero, but a little caution is
841 * required to avoid errors in the case where the range ends
842 * exactly at the top of the address space and so addr + len
843 * wraps round to zero.
845 vaddr wpend = wp->vaddr + wp->len - 1;
846 vaddr addrend = addr + len - 1;
848 return !(addr > wpend || wp->vaddr > addrend);
851 #endif
853 /* Add a breakpoint. */
854 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
855 CPUBreakpoint **breakpoint)
857 CPUBreakpoint *bp;
859 bp = g_malloc(sizeof(*bp));
861 bp->pc = pc;
862 bp->flags = flags;
864 /* keep all GDB-injected breakpoints in front */
865 if (flags & BP_GDB) {
866 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
867 } else {
868 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
871 breakpoint_invalidate(cpu, pc);
873 if (breakpoint) {
874 *breakpoint = bp;
876 return 0;
879 /* Remove a specific breakpoint. */
880 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
882 CPUBreakpoint *bp;
884 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
885 if (bp->pc == pc && bp->flags == flags) {
886 cpu_breakpoint_remove_by_ref(cpu, bp);
887 return 0;
890 return -ENOENT;
893 /* Remove a specific breakpoint by reference. */
894 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
896 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
898 breakpoint_invalidate(cpu, breakpoint->pc);
900 g_free(breakpoint);
903 /* Remove all matching breakpoints. */
904 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
906 CPUBreakpoint *bp, *next;
908 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
909 if (bp->flags & mask) {
910 cpu_breakpoint_remove_by_ref(cpu, bp);
915 /* enable or disable single step mode. EXCP_DEBUG is returned by the
916 CPU loop after each instruction */
917 void cpu_single_step(CPUState *cpu, int enabled)
919 if (cpu->singlestep_enabled != enabled) {
920 cpu->singlestep_enabled = enabled;
921 if (kvm_enabled()) {
922 kvm_update_guest_debug(cpu, 0);
923 } else {
924 /* must flush all the translated code to avoid inconsistencies */
925 /* XXX: only flush what is necessary */
926 tb_flush(cpu);
931 void cpu_abort(CPUState *cpu, const char *fmt, ...)
933 va_list ap;
934 va_list ap2;
936 va_start(ap, fmt);
937 va_copy(ap2, ap);
938 fprintf(stderr, "qemu: fatal: ");
939 vfprintf(stderr, fmt, ap);
940 fprintf(stderr, "\n");
941 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
942 if (qemu_log_separate()) {
943 qemu_log("qemu: fatal: ");
944 qemu_log_vprintf(fmt, ap2);
945 qemu_log("\n");
946 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
947 qemu_log_flush();
948 qemu_log_close();
950 va_end(ap2);
951 va_end(ap);
952 replay_finish();
953 #if defined(CONFIG_USER_ONLY)
955 struct sigaction act;
956 sigfillset(&act.sa_mask);
957 act.sa_handler = SIG_DFL;
958 sigaction(SIGABRT, &act, NULL);
960 #endif
961 abort();
964 #if !defined(CONFIG_USER_ONLY)
965 /* Called from RCU critical section */
966 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
968 RAMBlock *block;
970 block = atomic_rcu_read(&ram_list.mru_block);
971 if (block && addr - block->offset < block->max_length) {
972 return block;
974 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
975 if (addr - block->offset < block->max_length) {
976 goto found;
980 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
981 abort();
983 found:
984 /* It is safe to write mru_block outside the iothread lock. This
985 * is what happens:
987 * mru_block = xxx
988 * rcu_read_unlock()
989 * xxx removed from list
990 * rcu_read_lock()
991 * read mru_block
992 * mru_block = NULL;
993 * call_rcu(reclaim_ramblock, xxx);
994 * rcu_read_unlock()
996 * atomic_rcu_set is not needed here. The block was already published
997 * when it was placed into the list. Here we're just making an extra
998 * copy of the pointer.
1000 ram_list.mru_block = block;
1001 return block;
1004 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
1006 CPUState *cpu;
1007 ram_addr_t start1;
1008 RAMBlock *block;
1009 ram_addr_t end;
1011 end = TARGET_PAGE_ALIGN(start + length);
1012 start &= TARGET_PAGE_MASK;
1014 rcu_read_lock();
1015 block = qemu_get_ram_block(start);
1016 assert(block == qemu_get_ram_block(end - 1));
1017 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
1018 CPU_FOREACH(cpu) {
1019 tlb_reset_dirty(cpu, start1, length);
1021 rcu_read_unlock();
1024 /* Note: start and end must be within the same ram block. */
1025 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
1026 ram_addr_t length,
1027 unsigned client)
1029 DirtyMemoryBlocks *blocks;
1030 unsigned long end, page;
1031 bool dirty = false;
1033 if (length == 0) {
1034 return false;
1037 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
1038 page = start >> TARGET_PAGE_BITS;
1040 rcu_read_lock();
1042 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1044 while (page < end) {
1045 unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1046 unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1047 unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1049 dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1050 offset, num);
1051 page += num;
1054 rcu_read_unlock();
1056 if (dirty && tcg_enabled()) {
1057 tlb_reset_dirty_range_all(start, length);
1060 return dirty;
1063 /* Called from RCU critical section */
1064 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1065 MemoryRegionSection *section,
1066 target_ulong vaddr,
1067 hwaddr paddr, hwaddr xlat,
1068 int prot,
1069 target_ulong *address)
1071 hwaddr iotlb;
1072 CPUWatchpoint *wp;
1074 if (memory_region_is_ram(section->mr)) {
1075 /* Normal RAM. */
1076 iotlb = memory_region_get_ram_addr(section->mr) + xlat;
1077 if (!section->readonly) {
1078 iotlb |= PHYS_SECTION_NOTDIRTY;
1079 } else {
1080 iotlb |= PHYS_SECTION_ROM;
1082 } else {
1083 AddressSpaceDispatch *d;
1085 d = atomic_rcu_read(&section->address_space->dispatch);
1086 iotlb = section - d->map.sections;
1087 iotlb += xlat;
1090 /* Make accesses to pages with watchpoints go via the
1091 watchpoint trap routines. */
1092 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1093 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1094 /* Avoid trapping reads of pages with a write breakpoint. */
1095 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1096 iotlb = PHYS_SECTION_WATCH + paddr;
1097 *address |= TLB_MMIO;
1098 break;
1103 return iotlb;
1105 #endif /* defined(CONFIG_USER_ONLY) */
1107 #if !defined(CONFIG_USER_ONLY)
1109 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1110 uint16_t section);
1111 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1113 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1114 qemu_anon_ram_alloc;
1117 * Set a custom physical guest memory alloator.
1118 * Accelerators with unusual needs may need this. Hopefully, we can
1119 * get rid of it eventually.
1121 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1123 phys_mem_alloc = alloc;
1126 static uint16_t phys_section_add(PhysPageMap *map,
1127 MemoryRegionSection *section)
1129 /* The physical section number is ORed with a page-aligned
1130 * pointer to produce the iotlb entries. Thus it should
1131 * never overflow into the page-aligned value.
1133 assert(map->sections_nb < TARGET_PAGE_SIZE);
1135 if (map->sections_nb == map->sections_nb_alloc) {
1136 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1137 map->sections = g_renew(MemoryRegionSection, map->sections,
1138 map->sections_nb_alloc);
1140 map->sections[map->sections_nb] = *section;
1141 memory_region_ref(section->mr);
1142 return map->sections_nb++;
1145 static void phys_section_destroy(MemoryRegion *mr)
1147 bool have_sub_page = mr->subpage;
1149 memory_region_unref(mr);
1151 if (have_sub_page) {
1152 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1153 object_unref(OBJECT(&subpage->iomem));
1154 g_free(subpage);
1158 static void phys_sections_free(PhysPageMap *map)
1160 while (map->sections_nb > 0) {
1161 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1162 phys_section_destroy(section->mr);
1164 g_free(map->sections);
1165 g_free(map->nodes);
1168 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1170 subpage_t *subpage;
1171 hwaddr base = section->offset_within_address_space
1172 & TARGET_PAGE_MASK;
1173 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1174 d->map.nodes, d->map.sections);
1175 MemoryRegionSection subsection = {
1176 .offset_within_address_space = base,
1177 .size = int128_make64(TARGET_PAGE_SIZE),
1179 hwaddr start, end;
1181 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1183 if (!(existing->mr->subpage)) {
1184 subpage = subpage_init(d->as, base);
1185 subsection.address_space = d->as;
1186 subsection.mr = &subpage->iomem;
1187 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1188 phys_section_add(&d->map, &subsection));
1189 } else {
1190 subpage = container_of(existing->mr, subpage_t, iomem);
1192 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1193 end = start + int128_get64(section->size) - 1;
1194 subpage_register(subpage, start, end,
1195 phys_section_add(&d->map, section));
1199 static void register_multipage(AddressSpaceDispatch *d,
1200 MemoryRegionSection *section)
1202 hwaddr start_addr = section->offset_within_address_space;
1203 uint16_t section_index = phys_section_add(&d->map, section);
1204 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1205 TARGET_PAGE_BITS));
1207 assert(num_pages);
1208 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1211 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1213 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1214 AddressSpaceDispatch *d = as->next_dispatch;
1215 MemoryRegionSection now = *section, remain = *section;
1216 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1218 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1219 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1220 - now.offset_within_address_space;
1222 now.size = int128_min(int128_make64(left), now.size);
1223 register_subpage(d, &now);
1224 } else {
1225 now.size = int128_zero();
1227 while (int128_ne(remain.size, now.size)) {
1228 remain.size = int128_sub(remain.size, now.size);
1229 remain.offset_within_address_space += int128_get64(now.size);
1230 remain.offset_within_region += int128_get64(now.size);
1231 now = remain;
1232 if (int128_lt(remain.size, page_size)) {
1233 register_subpage(d, &now);
1234 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1235 now.size = page_size;
1236 register_subpage(d, &now);
1237 } else {
1238 now.size = int128_and(now.size, int128_neg(page_size));
1239 register_multipage(d, &now);
1244 void qemu_flush_coalesced_mmio_buffer(void)
1246 if (kvm_enabled())
1247 kvm_flush_coalesced_mmio_buffer();
1250 void qemu_mutex_lock_ramlist(void)
1252 qemu_mutex_lock(&ram_list.mutex);
1255 void qemu_mutex_unlock_ramlist(void)
1257 qemu_mutex_unlock(&ram_list.mutex);
1260 #ifdef __linux__
1261 static void *file_ram_alloc(RAMBlock *block,
1262 ram_addr_t memory,
1263 const char *path,
1264 Error **errp)
1266 bool unlink_on_error = false;
1267 char *filename;
1268 char *sanitized_name;
1269 char *c;
1270 void *area;
1271 int fd = -1;
1272 int64_t page_size;
1274 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1275 error_setg(errp,
1276 "host lacks kvm mmu notifiers, -mem-path unsupported");
1277 return NULL;
1280 for (;;) {
1281 fd = open(path, O_RDWR);
1282 if (fd >= 0) {
1283 /* @path names an existing file, use it */
1284 break;
1286 if (errno == ENOENT) {
1287 /* @path names a file that doesn't exist, create it */
1288 fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1289 if (fd >= 0) {
1290 unlink_on_error = true;
1291 break;
1293 } else if (errno == EISDIR) {
1294 /* @path names a directory, create a file there */
1295 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1296 sanitized_name = g_strdup(memory_region_name(block->mr));
1297 for (c = sanitized_name; *c != '\0'; c++) {
1298 if (*c == '/') {
1299 *c = '_';
1303 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1304 sanitized_name);
1305 g_free(sanitized_name);
1307 fd = mkstemp(filename);
1308 if (fd >= 0) {
1309 unlink(filename);
1310 g_free(filename);
1311 break;
1313 g_free(filename);
1315 if (errno != EEXIST && errno != EINTR) {
1316 error_setg_errno(errp, errno,
1317 "can't open backing store %s for guest RAM",
1318 path);
1319 goto error;
1322 * Try again on EINTR and EEXIST. The latter happens when
1323 * something else creates the file between our two open().
1327 page_size = qemu_fd_getpagesize(fd);
1328 block->mr->align = MAX(page_size, QEMU_VMALLOC_ALIGN);
1330 if (memory < page_size) {
1331 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1332 "or larger than page size 0x%" PRIx64,
1333 memory, page_size);
1334 goto error;
1337 memory = ROUND_UP(memory, page_size);
1340 * ftruncate is not supported by hugetlbfs in older
1341 * hosts, so don't bother bailing out on errors.
1342 * If anything goes wrong with it under other filesystems,
1343 * mmap will fail.
1345 if (ftruncate(fd, memory)) {
1346 perror("ftruncate");
1349 area = qemu_ram_mmap(fd, memory, block->mr->align,
1350 block->flags & RAM_SHARED);
1351 if (area == MAP_FAILED) {
1352 error_setg_errno(errp, errno,
1353 "unable to map backing store for guest RAM");
1354 goto error;
1357 if (mem_prealloc) {
1358 os_mem_prealloc(fd, area, memory);
1361 block->fd = fd;
1362 return area;
1364 error:
1365 if (unlink_on_error) {
1366 unlink(path);
1368 if (fd != -1) {
1369 close(fd);
1371 return NULL;
1373 #endif
1375 /* Called with the ramlist lock held. */
1376 static ram_addr_t find_ram_offset(ram_addr_t size)
1378 RAMBlock *block, *next_block;
1379 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1381 assert(size != 0); /* it would hand out same offset multiple times */
1383 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1384 return 0;
1387 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1388 ram_addr_t end, next = RAM_ADDR_MAX;
1390 end = block->offset + block->max_length;
1392 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1393 if (next_block->offset >= end) {
1394 next = MIN(next, next_block->offset);
1397 if (next - end >= size && next - end < mingap) {
1398 offset = end;
1399 mingap = next - end;
1403 if (offset == RAM_ADDR_MAX) {
1404 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1405 (uint64_t)size);
1406 abort();
1409 return offset;
1412 ram_addr_t last_ram_offset(void)
1414 RAMBlock *block;
1415 ram_addr_t last = 0;
1417 rcu_read_lock();
1418 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1419 last = MAX(last, block->offset + block->max_length);
1421 rcu_read_unlock();
1422 return last;
1425 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1427 int ret;
1429 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1430 if (!machine_dump_guest_core(current_machine)) {
1431 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1432 if (ret) {
1433 perror("qemu_madvise");
1434 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1435 "but dump_guest_core=off specified\n");
1440 const char *qemu_ram_get_idstr(RAMBlock *rb)
1442 return rb->idstr;
1445 /* Called with iothread lock held. */
1446 void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
1448 RAMBlock *block;
1450 assert(new_block);
1451 assert(!new_block->idstr[0]);
1453 if (dev) {
1454 char *id = qdev_get_dev_path(dev);
1455 if (id) {
1456 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1457 g_free(id);
1460 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1462 rcu_read_lock();
1463 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1464 if (block != new_block &&
1465 !strcmp(block->idstr, new_block->idstr)) {
1466 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1467 new_block->idstr);
1468 abort();
1471 rcu_read_unlock();
1474 /* Called with iothread lock held. */
1475 void qemu_ram_unset_idstr(RAMBlock *block)
1477 /* FIXME: arch_init.c assumes that this is not called throughout
1478 * migration. Ignore the problem since hot-unplug during migration
1479 * does not work anyway.
1481 if (block) {
1482 memset(block->idstr, 0, sizeof(block->idstr));
1486 static int memory_try_enable_merging(void *addr, size_t len)
1488 if (!machine_mem_merge(current_machine)) {
1489 /* disabled by the user */
1490 return 0;
1493 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1496 /* Only legal before guest might have detected the memory size: e.g. on
1497 * incoming migration, or right after reset.
1499 * As memory core doesn't know how is memory accessed, it is up to
1500 * resize callback to update device state and/or add assertions to detect
1501 * misuse, if necessary.
1503 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
1505 assert(block);
1507 newsize = HOST_PAGE_ALIGN(newsize);
1509 if (block->used_length == newsize) {
1510 return 0;
1513 if (!(block->flags & RAM_RESIZEABLE)) {
1514 error_setg_errno(errp, EINVAL,
1515 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1516 " in != 0x" RAM_ADDR_FMT, block->idstr,
1517 newsize, block->used_length);
1518 return -EINVAL;
1521 if (block->max_length < newsize) {
1522 error_setg_errno(errp, EINVAL,
1523 "Length too large: %s: 0x" RAM_ADDR_FMT
1524 " > 0x" RAM_ADDR_FMT, block->idstr,
1525 newsize, block->max_length);
1526 return -EINVAL;
1529 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1530 block->used_length = newsize;
1531 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1532 DIRTY_CLIENTS_ALL);
1533 memory_region_set_size(block->mr, newsize);
1534 if (block->resized) {
1535 block->resized(block->idstr, newsize, block->host);
1537 return 0;
1540 /* Called with ram_list.mutex held */
1541 static void dirty_memory_extend(ram_addr_t old_ram_size,
1542 ram_addr_t new_ram_size)
1544 ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1545 DIRTY_MEMORY_BLOCK_SIZE);
1546 ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1547 DIRTY_MEMORY_BLOCK_SIZE);
1548 int i;
1550 /* Only need to extend if block count increased */
1551 if (new_num_blocks <= old_num_blocks) {
1552 return;
1555 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1556 DirtyMemoryBlocks *old_blocks;
1557 DirtyMemoryBlocks *new_blocks;
1558 int j;
1560 old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1561 new_blocks = g_malloc(sizeof(*new_blocks) +
1562 sizeof(new_blocks->blocks[0]) * new_num_blocks);
1564 if (old_num_blocks) {
1565 memcpy(new_blocks->blocks, old_blocks->blocks,
1566 old_num_blocks * sizeof(old_blocks->blocks[0]));
1569 for (j = old_num_blocks; j < new_num_blocks; j++) {
1570 new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1573 atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1575 if (old_blocks) {
1576 g_free_rcu(old_blocks, rcu);
1581 static void ram_block_add(RAMBlock *new_block, Error **errp)
1583 RAMBlock *block;
1584 RAMBlock *last_block = NULL;
1585 ram_addr_t old_ram_size, new_ram_size;
1586 Error *err = NULL;
1588 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1590 qemu_mutex_lock_ramlist();
1591 new_block->offset = find_ram_offset(new_block->max_length);
1593 if (!new_block->host) {
1594 if (xen_enabled()) {
1595 xen_ram_alloc(new_block->offset, new_block->max_length,
1596 new_block->mr, &err);
1597 if (err) {
1598 error_propagate(errp, err);
1599 qemu_mutex_unlock_ramlist();
1600 return;
1602 } else {
1603 new_block->host = phys_mem_alloc(new_block->max_length,
1604 &new_block->mr->align);
1605 if (!new_block->host) {
1606 error_setg_errno(errp, errno,
1607 "cannot set up guest memory '%s'",
1608 memory_region_name(new_block->mr));
1609 qemu_mutex_unlock_ramlist();
1610 return;
1612 memory_try_enable_merging(new_block->host, new_block->max_length);
1616 new_ram_size = MAX(old_ram_size,
1617 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1618 if (new_ram_size > old_ram_size) {
1619 migration_bitmap_extend(old_ram_size, new_ram_size);
1620 dirty_memory_extend(old_ram_size, new_ram_size);
1622 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1623 * QLIST (which has an RCU-friendly variant) does not have insertion at
1624 * tail, so save the last element in last_block.
1626 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1627 last_block = block;
1628 if (block->max_length < new_block->max_length) {
1629 break;
1632 if (block) {
1633 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1634 } else if (last_block) {
1635 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1636 } else { /* list is empty */
1637 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1639 ram_list.mru_block = NULL;
1641 /* Write list before version */
1642 smp_wmb();
1643 ram_list.version++;
1644 qemu_mutex_unlock_ramlist();
1646 cpu_physical_memory_set_dirty_range(new_block->offset,
1647 new_block->used_length,
1648 DIRTY_CLIENTS_ALL);
1650 if (new_block->host) {
1651 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1652 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1653 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1654 if (kvm_enabled()) {
1655 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1660 #ifdef __linux__
1661 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1662 bool share, const char *mem_path,
1663 Error **errp)
1665 RAMBlock *new_block;
1666 Error *local_err = NULL;
1668 if (xen_enabled()) {
1669 error_setg(errp, "-mem-path not supported with Xen");
1670 return NULL;
1673 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1675 * file_ram_alloc() needs to allocate just like
1676 * phys_mem_alloc, but we haven't bothered to provide
1677 * a hook there.
1679 error_setg(errp,
1680 "-mem-path not supported with this accelerator");
1681 return NULL;
1684 size = HOST_PAGE_ALIGN(size);
1685 new_block = g_malloc0(sizeof(*new_block));
1686 new_block->mr = mr;
1687 new_block->used_length = size;
1688 new_block->max_length = size;
1689 new_block->flags = share ? RAM_SHARED : 0;
1690 new_block->host = file_ram_alloc(new_block, size,
1691 mem_path, errp);
1692 if (!new_block->host) {
1693 g_free(new_block);
1694 return NULL;
1697 ram_block_add(new_block, &local_err);
1698 if (local_err) {
1699 g_free(new_block);
1700 error_propagate(errp, local_err);
1701 return NULL;
1703 return new_block;
1705 #endif
1707 static
1708 RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1709 void (*resized)(const char*,
1710 uint64_t length,
1711 void *host),
1712 void *host, bool resizeable,
1713 MemoryRegion *mr, Error **errp)
1715 RAMBlock *new_block;
1716 Error *local_err = NULL;
1718 size = HOST_PAGE_ALIGN(size);
1719 max_size = HOST_PAGE_ALIGN(max_size);
1720 new_block = g_malloc0(sizeof(*new_block));
1721 new_block->mr = mr;
1722 new_block->resized = resized;
1723 new_block->used_length = size;
1724 new_block->max_length = max_size;
1725 assert(max_size >= size);
1726 new_block->fd = -1;
1727 new_block->host = host;
1728 if (host) {
1729 new_block->flags |= RAM_PREALLOC;
1731 if (resizeable) {
1732 new_block->flags |= RAM_RESIZEABLE;
1734 ram_block_add(new_block, &local_err);
1735 if (local_err) {
1736 g_free(new_block);
1737 error_propagate(errp, local_err);
1738 return NULL;
1740 return new_block;
1743 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1744 MemoryRegion *mr, Error **errp)
1746 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1749 RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1751 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1754 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1755 void (*resized)(const char*,
1756 uint64_t length,
1757 void *host),
1758 MemoryRegion *mr, Error **errp)
1760 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1763 static void reclaim_ramblock(RAMBlock *block)
1765 if (block->flags & RAM_PREALLOC) {
1767 } else if (xen_enabled()) {
1768 xen_invalidate_map_cache_entry(block->host);
1769 #ifndef _WIN32
1770 } else if (block->fd >= 0) {
1771 qemu_ram_munmap(block->host, block->max_length);
1772 close(block->fd);
1773 #endif
1774 } else {
1775 qemu_anon_ram_free(block->host, block->max_length);
1777 g_free(block);
1780 void qemu_ram_free(RAMBlock *block)
1782 if (!block) {
1783 return;
1786 qemu_mutex_lock_ramlist();
1787 QLIST_REMOVE_RCU(block, next);
1788 ram_list.mru_block = NULL;
1789 /* Write list before version */
1790 smp_wmb();
1791 ram_list.version++;
1792 call_rcu(block, reclaim_ramblock, rcu);
1793 qemu_mutex_unlock_ramlist();
1796 #ifndef _WIN32
1797 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1799 RAMBlock *block;
1800 ram_addr_t offset;
1801 int flags;
1802 void *area, *vaddr;
1804 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1805 offset = addr - block->offset;
1806 if (offset < block->max_length) {
1807 vaddr = ramblock_ptr(block, offset);
1808 if (block->flags & RAM_PREALLOC) {
1810 } else if (xen_enabled()) {
1811 abort();
1812 } else {
1813 flags = MAP_FIXED;
1814 if (block->fd >= 0) {
1815 flags |= (block->flags & RAM_SHARED ?
1816 MAP_SHARED : MAP_PRIVATE);
1817 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1818 flags, block->fd, offset);
1819 } else {
1821 * Remap needs to match alloc. Accelerators that
1822 * set phys_mem_alloc never remap. If they did,
1823 * we'd need a remap hook here.
1825 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1827 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1828 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1829 flags, -1, 0);
1831 if (area != vaddr) {
1832 fprintf(stderr, "Could not remap addr: "
1833 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1834 length, addr);
1835 exit(1);
1837 memory_try_enable_merging(vaddr, length);
1838 qemu_ram_setup_dump(vaddr, length);
1843 #endif /* !_WIN32 */
1845 int qemu_get_ram_fd(ram_addr_t addr)
1847 RAMBlock *block;
1848 int fd;
1850 rcu_read_lock();
1851 block = qemu_get_ram_block(addr);
1852 fd = block->fd;
1853 rcu_read_unlock();
1854 return fd;
1857 void qemu_set_ram_fd(ram_addr_t addr, int fd)
1859 RAMBlock *block;
1861 rcu_read_lock();
1862 block = qemu_get_ram_block(addr);
1863 block->fd = fd;
1864 rcu_read_unlock();
1867 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1869 RAMBlock *block;
1870 void *ptr;
1872 rcu_read_lock();
1873 block = qemu_get_ram_block(addr);
1874 ptr = ramblock_ptr(block, 0);
1875 rcu_read_unlock();
1876 return ptr;
1879 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1880 * This should not be used for general purpose DMA. Use address_space_map
1881 * or address_space_rw instead. For local memory (e.g. video ram) that the
1882 * device owns, use memory_region_get_ram_ptr.
1884 * Called within RCU critical section.
1886 void *qemu_get_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1888 RAMBlock *block = ram_block;
1890 if (block == NULL) {
1891 block = qemu_get_ram_block(addr);
1894 if (xen_enabled() && block->host == NULL) {
1895 /* We need to check if the requested address is in the RAM
1896 * because we don't want to map the entire memory in QEMU.
1897 * In that case just map until the end of the page.
1899 if (block->offset == 0) {
1900 return xen_map_cache(addr, 0, 0);
1903 block->host = xen_map_cache(block->offset, block->max_length, 1);
1905 return ramblock_ptr(block, addr - block->offset);
1908 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1909 * but takes a size argument.
1911 * Called within RCU critical section.
1913 static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1914 hwaddr *size)
1916 RAMBlock *block = ram_block;
1917 ram_addr_t offset_inside_block;
1918 if (*size == 0) {
1919 return NULL;
1922 if (block == NULL) {
1923 block = qemu_get_ram_block(addr);
1925 offset_inside_block = addr - block->offset;
1926 *size = MIN(*size, block->max_length - offset_inside_block);
1928 if (xen_enabled() && block->host == NULL) {
1929 /* We need to check if the requested address is in the RAM
1930 * because we don't want to map the entire memory in QEMU.
1931 * In that case just map the requested area.
1933 if (block->offset == 0) {
1934 return xen_map_cache(addr, *size, 1);
1937 block->host = xen_map_cache(block->offset, block->max_length, 1);
1940 return ramblock_ptr(block, offset_inside_block);
1944 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1945 * in that RAMBlock.
1947 * ptr: Host pointer to look up
1948 * round_offset: If true round the result offset down to a page boundary
1949 * *ram_addr: set to result ram_addr
1950 * *offset: set to result offset within the RAMBlock
1952 * Returns: RAMBlock (or NULL if not found)
1954 * By the time this function returns, the returned pointer is not protected
1955 * by RCU anymore. If the caller is not within an RCU critical section and
1956 * does not hold the iothread lock, it must have other means of protecting the
1957 * pointer, such as a reference to the region that includes the incoming
1958 * ram_addr_t.
1960 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1961 ram_addr_t *ram_addr,
1962 ram_addr_t *offset)
1964 RAMBlock *block;
1965 uint8_t *host = ptr;
1967 if (xen_enabled()) {
1968 rcu_read_lock();
1969 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1970 block = qemu_get_ram_block(*ram_addr);
1971 if (block) {
1972 *offset = (host - block->host);
1974 rcu_read_unlock();
1975 return block;
1978 rcu_read_lock();
1979 block = atomic_rcu_read(&ram_list.mru_block);
1980 if (block && block->host && host - block->host < block->max_length) {
1981 goto found;
1984 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1985 /* This case append when the block is not mapped. */
1986 if (block->host == NULL) {
1987 continue;
1989 if (host - block->host < block->max_length) {
1990 goto found;
1994 rcu_read_unlock();
1995 return NULL;
1997 found:
1998 *offset = (host - block->host);
1999 if (round_offset) {
2000 *offset &= TARGET_PAGE_MASK;
2002 *ram_addr = block->offset + *offset;
2003 rcu_read_unlock();
2004 return block;
2008 * Finds the named RAMBlock
2010 * name: The name of RAMBlock to find
2012 * Returns: RAMBlock (or NULL if not found)
2014 RAMBlock *qemu_ram_block_by_name(const char *name)
2016 RAMBlock *block;
2018 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2019 if (!strcmp(name, block->idstr)) {
2020 return block;
2024 return NULL;
2027 /* Some of the softmmu routines need to translate from a host pointer
2028 (typically a TLB entry) back to a ram offset. */
2029 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2031 RAMBlock *block;
2032 ram_addr_t offset; /* Not used */
2034 block = qemu_ram_block_from_host(ptr, false, ram_addr, &offset);
2036 if (!block) {
2037 return NULL;
2040 return block->mr;
2043 /* Called within RCU critical section. */
2044 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2045 uint64_t val, unsigned size)
2047 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
2048 tb_invalidate_phys_page_fast(ram_addr, size);
2050 switch (size) {
2051 case 1:
2052 stb_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2053 break;
2054 case 2:
2055 stw_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2056 break;
2057 case 4:
2058 stl_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2059 break;
2060 default:
2061 abort();
2063 /* Set both VGA and migration bits for simplicity and to remove
2064 * the notdirty callback faster.
2066 cpu_physical_memory_set_dirty_range(ram_addr, size,
2067 DIRTY_CLIENTS_NOCODE);
2068 /* we remove the notdirty callback only if the code has been
2069 flushed */
2070 if (!cpu_physical_memory_is_clean(ram_addr)) {
2071 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2075 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2076 unsigned size, bool is_write)
2078 return is_write;
2081 static const MemoryRegionOps notdirty_mem_ops = {
2082 .write = notdirty_mem_write,
2083 .valid.accepts = notdirty_mem_accepts,
2084 .endianness = DEVICE_NATIVE_ENDIAN,
2087 /* Generate a debug exception if a watchpoint has been hit. */
2088 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2090 CPUState *cpu = current_cpu;
2091 CPUClass *cc = CPU_GET_CLASS(cpu);
2092 CPUArchState *env = cpu->env_ptr;
2093 target_ulong pc, cs_base;
2094 target_ulong vaddr;
2095 CPUWatchpoint *wp;
2096 uint32_t cpu_flags;
2098 if (cpu->watchpoint_hit) {
2099 /* We re-entered the check after replacing the TB. Now raise
2100 * the debug interrupt so that is will trigger after the
2101 * current instruction. */
2102 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2103 return;
2105 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2106 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2107 if (cpu_watchpoint_address_matches(wp, vaddr, len)
2108 && (wp->flags & flags)) {
2109 if (flags == BP_MEM_READ) {
2110 wp->flags |= BP_WATCHPOINT_HIT_READ;
2111 } else {
2112 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2114 wp->hitaddr = vaddr;
2115 wp->hitattrs = attrs;
2116 if (!cpu->watchpoint_hit) {
2117 if (wp->flags & BP_CPU &&
2118 !cc->debug_check_watchpoint(cpu, wp)) {
2119 wp->flags &= ~BP_WATCHPOINT_HIT;
2120 continue;
2122 cpu->watchpoint_hit = wp;
2123 tb_check_watchpoint(cpu);
2124 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2125 cpu->exception_index = EXCP_DEBUG;
2126 cpu_loop_exit(cpu);
2127 } else {
2128 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2129 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2130 cpu_resume_from_signal(cpu, NULL);
2133 } else {
2134 wp->flags &= ~BP_WATCHPOINT_HIT;
2139 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2140 so these check for a hit then pass through to the normal out-of-line
2141 phys routines. */
2142 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2143 unsigned size, MemTxAttrs attrs)
2145 MemTxResult res;
2146 uint64_t data;
2147 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2148 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2150 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2151 switch (size) {
2152 case 1:
2153 data = address_space_ldub(as, addr, attrs, &res);
2154 break;
2155 case 2:
2156 data = address_space_lduw(as, addr, attrs, &res);
2157 break;
2158 case 4:
2159 data = address_space_ldl(as, addr, attrs, &res);
2160 break;
2161 default: abort();
2163 *pdata = data;
2164 return res;
2167 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2168 uint64_t val, unsigned size,
2169 MemTxAttrs attrs)
2171 MemTxResult res;
2172 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2173 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2175 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2176 switch (size) {
2177 case 1:
2178 address_space_stb(as, addr, val, attrs, &res);
2179 break;
2180 case 2:
2181 address_space_stw(as, addr, val, attrs, &res);
2182 break;
2183 case 4:
2184 address_space_stl(as, addr, val, attrs, &res);
2185 break;
2186 default: abort();
2188 return res;
2191 static const MemoryRegionOps watch_mem_ops = {
2192 .read_with_attrs = watch_mem_read,
2193 .write_with_attrs = watch_mem_write,
2194 .endianness = DEVICE_NATIVE_ENDIAN,
2197 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2198 unsigned len, MemTxAttrs attrs)
2200 subpage_t *subpage = opaque;
2201 uint8_t buf[8];
2202 MemTxResult res;
2204 #if defined(DEBUG_SUBPAGE)
2205 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2206 subpage, len, addr);
2207 #endif
2208 res = address_space_read(subpage->as, addr + subpage->base,
2209 attrs, buf, len);
2210 if (res) {
2211 return res;
2213 switch (len) {
2214 case 1:
2215 *data = ldub_p(buf);
2216 return MEMTX_OK;
2217 case 2:
2218 *data = lduw_p(buf);
2219 return MEMTX_OK;
2220 case 4:
2221 *data = ldl_p(buf);
2222 return MEMTX_OK;
2223 case 8:
2224 *data = ldq_p(buf);
2225 return MEMTX_OK;
2226 default:
2227 abort();
2231 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2232 uint64_t value, unsigned len, MemTxAttrs attrs)
2234 subpage_t *subpage = opaque;
2235 uint8_t buf[8];
2237 #if defined(DEBUG_SUBPAGE)
2238 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2239 " value %"PRIx64"\n",
2240 __func__, subpage, len, addr, value);
2241 #endif
2242 switch (len) {
2243 case 1:
2244 stb_p(buf, value);
2245 break;
2246 case 2:
2247 stw_p(buf, value);
2248 break;
2249 case 4:
2250 stl_p(buf, value);
2251 break;
2252 case 8:
2253 stq_p(buf, value);
2254 break;
2255 default:
2256 abort();
2258 return address_space_write(subpage->as, addr + subpage->base,
2259 attrs, buf, len);
2262 static bool subpage_accepts(void *opaque, hwaddr addr,
2263 unsigned len, bool is_write)
2265 subpage_t *subpage = opaque;
2266 #if defined(DEBUG_SUBPAGE)
2267 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2268 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2269 #endif
2271 return address_space_access_valid(subpage->as, addr + subpage->base,
2272 len, is_write);
2275 static const MemoryRegionOps subpage_ops = {
2276 .read_with_attrs = subpage_read,
2277 .write_with_attrs = subpage_write,
2278 .impl.min_access_size = 1,
2279 .impl.max_access_size = 8,
2280 .valid.min_access_size = 1,
2281 .valid.max_access_size = 8,
2282 .valid.accepts = subpage_accepts,
2283 .endianness = DEVICE_NATIVE_ENDIAN,
2286 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2287 uint16_t section)
2289 int idx, eidx;
2291 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2292 return -1;
2293 idx = SUBPAGE_IDX(start);
2294 eidx = SUBPAGE_IDX(end);
2295 #if defined(DEBUG_SUBPAGE)
2296 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2297 __func__, mmio, start, end, idx, eidx, section);
2298 #endif
2299 for (; idx <= eidx; idx++) {
2300 mmio->sub_section[idx] = section;
2303 return 0;
2306 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2308 subpage_t *mmio;
2310 mmio = g_malloc0(sizeof(subpage_t));
2312 mmio->as = as;
2313 mmio->base = base;
2314 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2315 NULL, TARGET_PAGE_SIZE);
2316 mmio->iomem.subpage = true;
2317 #if defined(DEBUG_SUBPAGE)
2318 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2319 mmio, base, TARGET_PAGE_SIZE);
2320 #endif
2321 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2323 return mmio;
2326 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2327 MemoryRegion *mr)
2329 assert(as);
2330 MemoryRegionSection section = {
2331 .address_space = as,
2332 .mr = mr,
2333 .offset_within_address_space = 0,
2334 .offset_within_region = 0,
2335 .size = int128_2_64(),
2338 return phys_section_add(map, &section);
2341 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2343 int asidx = cpu_asidx_from_attrs(cpu, attrs);
2344 CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2345 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2346 MemoryRegionSection *sections = d->map.sections;
2348 return sections[index & ~TARGET_PAGE_MASK].mr;
2351 static void io_mem_init(void)
2353 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2354 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2355 NULL, UINT64_MAX);
2356 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2357 NULL, UINT64_MAX);
2358 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2359 NULL, UINT64_MAX);
2362 static void mem_begin(MemoryListener *listener)
2364 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2365 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2366 uint16_t n;
2368 n = dummy_section(&d->map, as, &io_mem_unassigned);
2369 assert(n == PHYS_SECTION_UNASSIGNED);
2370 n = dummy_section(&d->map, as, &io_mem_notdirty);
2371 assert(n == PHYS_SECTION_NOTDIRTY);
2372 n = dummy_section(&d->map, as, &io_mem_rom);
2373 assert(n == PHYS_SECTION_ROM);
2374 n = dummy_section(&d->map, as, &io_mem_watch);
2375 assert(n == PHYS_SECTION_WATCH);
2377 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2378 d->as = as;
2379 as->next_dispatch = d;
2382 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2384 phys_sections_free(&d->map);
2385 g_free(d);
2388 static void mem_commit(MemoryListener *listener)
2390 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2391 AddressSpaceDispatch *cur = as->dispatch;
2392 AddressSpaceDispatch *next = as->next_dispatch;
2394 phys_page_compact_all(next, next->map.nodes_nb);
2396 atomic_rcu_set(&as->dispatch, next);
2397 if (cur) {
2398 call_rcu(cur, address_space_dispatch_free, rcu);
2402 static void tcg_commit(MemoryListener *listener)
2404 CPUAddressSpace *cpuas;
2405 AddressSpaceDispatch *d;
2407 /* since each CPU stores ram addresses in its TLB cache, we must
2408 reset the modified entries */
2409 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2410 cpu_reloading_memory_map();
2411 /* The CPU and TLB are protected by the iothread lock.
2412 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2413 * may have split the RCU critical section.
2415 d = atomic_rcu_read(&cpuas->as->dispatch);
2416 cpuas->memory_dispatch = d;
2417 tlb_flush(cpuas->cpu, 1);
2420 void address_space_init_dispatch(AddressSpace *as)
2422 as->dispatch = NULL;
2423 as->dispatch_listener = (MemoryListener) {
2424 .begin = mem_begin,
2425 .commit = mem_commit,
2426 .region_add = mem_add,
2427 .region_nop = mem_add,
2428 .priority = 0,
2430 memory_listener_register(&as->dispatch_listener, as);
2433 void address_space_unregister(AddressSpace *as)
2435 memory_listener_unregister(&as->dispatch_listener);
2438 void address_space_destroy_dispatch(AddressSpace *as)
2440 AddressSpaceDispatch *d = as->dispatch;
2442 atomic_rcu_set(&as->dispatch, NULL);
2443 if (d) {
2444 call_rcu(d, address_space_dispatch_free, rcu);
2448 static void memory_map_init(void)
2450 system_memory = g_malloc(sizeof(*system_memory));
2452 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2453 address_space_init(&address_space_memory, system_memory, "memory");
2455 system_io = g_malloc(sizeof(*system_io));
2456 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2457 65536);
2458 address_space_init(&address_space_io, system_io, "I/O");
2461 MemoryRegion *get_system_memory(void)
2463 return system_memory;
2466 MemoryRegion *get_system_io(void)
2468 return system_io;
2471 #endif /* !defined(CONFIG_USER_ONLY) */
2473 /* physical memory access (slow version, mainly for debug) */
2474 #if defined(CONFIG_USER_ONLY)
2475 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2476 uint8_t *buf, int len, int is_write)
2478 int l, flags;
2479 target_ulong page;
2480 void * p;
2482 while (len > 0) {
2483 page = addr & TARGET_PAGE_MASK;
2484 l = (page + TARGET_PAGE_SIZE) - addr;
2485 if (l > len)
2486 l = len;
2487 flags = page_get_flags(page);
2488 if (!(flags & PAGE_VALID))
2489 return -1;
2490 if (is_write) {
2491 if (!(flags & PAGE_WRITE))
2492 return -1;
2493 /* XXX: this code should not depend on lock_user */
2494 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2495 return -1;
2496 memcpy(p, buf, l);
2497 unlock_user(p, addr, l);
2498 } else {
2499 if (!(flags & PAGE_READ))
2500 return -1;
2501 /* XXX: this code should not depend on lock_user */
2502 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2503 return -1;
2504 memcpy(buf, p, l);
2505 unlock_user(p, addr, 0);
2507 len -= l;
2508 buf += l;
2509 addr += l;
2511 return 0;
2514 #else
2516 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2517 hwaddr length)
2519 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2520 /* No early return if dirty_log_mask is or becomes 0, because
2521 * cpu_physical_memory_set_dirty_range will still call
2522 * xen_modified_memory.
2524 if (dirty_log_mask) {
2525 dirty_log_mask =
2526 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2528 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2529 tb_invalidate_phys_range(addr, addr + length);
2530 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2532 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2535 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2537 unsigned access_size_max = mr->ops->valid.max_access_size;
2539 /* Regions are assumed to support 1-4 byte accesses unless
2540 otherwise specified. */
2541 if (access_size_max == 0) {
2542 access_size_max = 4;
2545 /* Bound the maximum access by the alignment of the address. */
2546 if (!mr->ops->impl.unaligned) {
2547 unsigned align_size_max = addr & -addr;
2548 if (align_size_max != 0 && align_size_max < access_size_max) {
2549 access_size_max = align_size_max;
2553 /* Don't attempt accesses larger than the maximum. */
2554 if (l > access_size_max) {
2555 l = access_size_max;
2557 l = pow2floor(l);
2559 return l;
2562 static bool prepare_mmio_access(MemoryRegion *mr)
2564 bool unlocked = !qemu_mutex_iothread_locked();
2565 bool release_lock = false;
2567 if (unlocked && mr->global_locking) {
2568 qemu_mutex_lock_iothread();
2569 unlocked = false;
2570 release_lock = true;
2572 if (mr->flush_coalesced_mmio) {
2573 if (unlocked) {
2574 qemu_mutex_lock_iothread();
2576 qemu_flush_coalesced_mmio_buffer();
2577 if (unlocked) {
2578 qemu_mutex_unlock_iothread();
2582 return release_lock;
2585 /* Called within RCU critical section. */
2586 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2587 MemTxAttrs attrs,
2588 const uint8_t *buf,
2589 int len, hwaddr addr1,
2590 hwaddr l, MemoryRegion *mr)
2592 uint8_t *ptr;
2593 uint64_t val;
2594 MemTxResult result = MEMTX_OK;
2595 bool release_lock = false;
2597 for (;;) {
2598 if (!memory_access_is_direct(mr, true)) {
2599 release_lock |= prepare_mmio_access(mr);
2600 l = memory_access_size(mr, l, addr1);
2601 /* XXX: could force current_cpu to NULL to avoid
2602 potential bugs */
2603 switch (l) {
2604 case 8:
2605 /* 64 bit write access */
2606 val = ldq_p(buf);
2607 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2608 attrs);
2609 break;
2610 case 4:
2611 /* 32 bit write access */
2612 val = ldl_p(buf);
2613 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2614 attrs);
2615 break;
2616 case 2:
2617 /* 16 bit write access */
2618 val = lduw_p(buf);
2619 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2620 attrs);
2621 break;
2622 case 1:
2623 /* 8 bit write access */
2624 val = ldub_p(buf);
2625 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2626 attrs);
2627 break;
2628 default:
2629 abort();
2631 } else {
2632 addr1 += memory_region_get_ram_addr(mr);
2633 /* RAM case */
2634 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
2635 memcpy(ptr, buf, l);
2636 invalidate_and_set_dirty(mr, addr1, l);
2639 if (release_lock) {
2640 qemu_mutex_unlock_iothread();
2641 release_lock = false;
2644 len -= l;
2645 buf += l;
2646 addr += l;
2648 if (!len) {
2649 break;
2652 l = len;
2653 mr = address_space_translate(as, addr, &addr1, &l, true);
2656 return result;
2659 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2660 const uint8_t *buf, int len)
2662 hwaddr l;
2663 hwaddr addr1;
2664 MemoryRegion *mr;
2665 MemTxResult result = MEMTX_OK;
2667 if (len > 0) {
2668 rcu_read_lock();
2669 l = len;
2670 mr = address_space_translate(as, addr, &addr1, &l, true);
2671 result = address_space_write_continue(as, addr, attrs, buf, len,
2672 addr1, l, mr);
2673 rcu_read_unlock();
2676 return result;
2679 /* Called within RCU critical section. */
2680 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2681 MemTxAttrs attrs, uint8_t *buf,
2682 int len, hwaddr addr1, hwaddr l,
2683 MemoryRegion *mr)
2685 uint8_t *ptr;
2686 uint64_t val;
2687 MemTxResult result = MEMTX_OK;
2688 bool release_lock = false;
2690 for (;;) {
2691 if (!memory_access_is_direct(mr, false)) {
2692 /* I/O case */
2693 release_lock |= prepare_mmio_access(mr);
2694 l = memory_access_size(mr, l, addr1);
2695 switch (l) {
2696 case 8:
2697 /* 64 bit read access */
2698 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2699 attrs);
2700 stq_p(buf, val);
2701 break;
2702 case 4:
2703 /* 32 bit read access */
2704 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2705 attrs);
2706 stl_p(buf, val);
2707 break;
2708 case 2:
2709 /* 16 bit read access */
2710 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2711 attrs);
2712 stw_p(buf, val);
2713 break;
2714 case 1:
2715 /* 8 bit read access */
2716 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2717 attrs);
2718 stb_p(buf, val);
2719 break;
2720 default:
2721 abort();
2723 } else {
2724 /* RAM case */
2725 ptr = qemu_get_ram_ptr(mr->ram_block,
2726 memory_region_get_ram_addr(mr) + addr1);
2727 memcpy(buf, ptr, l);
2730 if (release_lock) {
2731 qemu_mutex_unlock_iothread();
2732 release_lock = false;
2735 len -= l;
2736 buf += l;
2737 addr += l;
2739 if (!len) {
2740 break;
2743 l = len;
2744 mr = address_space_translate(as, addr, &addr1, &l, false);
2747 return result;
2750 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2751 MemTxAttrs attrs, uint8_t *buf, int len)
2753 hwaddr l;
2754 hwaddr addr1;
2755 MemoryRegion *mr;
2756 MemTxResult result = MEMTX_OK;
2758 if (len > 0) {
2759 rcu_read_lock();
2760 l = len;
2761 mr = address_space_translate(as, addr, &addr1, &l, false);
2762 result = address_space_read_continue(as, addr, attrs, buf, len,
2763 addr1, l, mr);
2764 rcu_read_unlock();
2767 return result;
2770 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2771 uint8_t *buf, int len, bool is_write)
2773 if (is_write) {
2774 return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2775 } else {
2776 return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2780 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2781 int len, int is_write)
2783 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2784 buf, len, is_write);
2787 enum write_rom_type {
2788 WRITE_DATA,
2789 FLUSH_CACHE,
2792 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2793 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2795 hwaddr l;
2796 uint8_t *ptr;
2797 hwaddr addr1;
2798 MemoryRegion *mr;
2800 rcu_read_lock();
2801 while (len > 0) {
2802 l = len;
2803 mr = address_space_translate(as, addr, &addr1, &l, true);
2805 if (!(memory_region_is_ram(mr) ||
2806 memory_region_is_romd(mr))) {
2807 l = memory_access_size(mr, l, addr1);
2808 } else {
2809 addr1 += memory_region_get_ram_addr(mr);
2810 /* ROM/RAM case */
2811 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
2812 switch (type) {
2813 case WRITE_DATA:
2814 memcpy(ptr, buf, l);
2815 invalidate_and_set_dirty(mr, addr1, l);
2816 break;
2817 case FLUSH_CACHE:
2818 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2819 break;
2822 len -= l;
2823 buf += l;
2824 addr += l;
2826 rcu_read_unlock();
2829 /* used for ROM loading : can write in RAM and ROM */
2830 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2831 const uint8_t *buf, int len)
2833 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2836 void cpu_flush_icache_range(hwaddr start, int len)
2839 * This function should do the same thing as an icache flush that was
2840 * triggered from within the guest. For TCG we are always cache coherent,
2841 * so there is no need to flush anything. For KVM / Xen we need to flush
2842 * the host's instruction cache at least.
2844 if (tcg_enabled()) {
2845 return;
2848 cpu_physical_memory_write_rom_internal(&address_space_memory,
2849 start, NULL, len, FLUSH_CACHE);
2852 typedef struct {
2853 MemoryRegion *mr;
2854 void *buffer;
2855 hwaddr addr;
2856 hwaddr len;
2857 bool in_use;
2858 } BounceBuffer;
2860 static BounceBuffer bounce;
2862 typedef struct MapClient {
2863 QEMUBH *bh;
2864 QLIST_ENTRY(MapClient) link;
2865 } MapClient;
2867 QemuMutex map_client_list_lock;
2868 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2869 = QLIST_HEAD_INITIALIZER(map_client_list);
2871 static void cpu_unregister_map_client_do(MapClient *client)
2873 QLIST_REMOVE(client, link);
2874 g_free(client);
2877 static void cpu_notify_map_clients_locked(void)
2879 MapClient *client;
2881 while (!QLIST_EMPTY(&map_client_list)) {
2882 client = QLIST_FIRST(&map_client_list);
2883 qemu_bh_schedule(client->bh);
2884 cpu_unregister_map_client_do(client);
2888 void cpu_register_map_client(QEMUBH *bh)
2890 MapClient *client = g_malloc(sizeof(*client));
2892 qemu_mutex_lock(&map_client_list_lock);
2893 client->bh = bh;
2894 QLIST_INSERT_HEAD(&map_client_list, client, link);
2895 if (!atomic_read(&bounce.in_use)) {
2896 cpu_notify_map_clients_locked();
2898 qemu_mutex_unlock(&map_client_list_lock);
2901 void cpu_exec_init_all(void)
2903 qemu_mutex_init(&ram_list.mutex);
2904 io_mem_init();
2905 memory_map_init();
2906 qemu_mutex_init(&map_client_list_lock);
2909 void cpu_unregister_map_client(QEMUBH *bh)
2911 MapClient *client;
2913 qemu_mutex_lock(&map_client_list_lock);
2914 QLIST_FOREACH(client, &map_client_list, link) {
2915 if (client->bh == bh) {
2916 cpu_unregister_map_client_do(client);
2917 break;
2920 qemu_mutex_unlock(&map_client_list_lock);
2923 static void cpu_notify_map_clients(void)
2925 qemu_mutex_lock(&map_client_list_lock);
2926 cpu_notify_map_clients_locked();
2927 qemu_mutex_unlock(&map_client_list_lock);
2930 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2932 MemoryRegion *mr;
2933 hwaddr l, xlat;
2935 rcu_read_lock();
2936 while (len > 0) {
2937 l = len;
2938 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2939 if (!memory_access_is_direct(mr, is_write)) {
2940 l = memory_access_size(mr, l, addr);
2941 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2942 return false;
2946 len -= l;
2947 addr += l;
2949 rcu_read_unlock();
2950 return true;
2953 /* Map a physical memory region into a host virtual address.
2954 * May map a subset of the requested range, given by and returned in *plen.
2955 * May return NULL if resources needed to perform the mapping are exhausted.
2956 * Use only for reads OR writes - not for read-modify-write operations.
2957 * Use cpu_register_map_client() to know when retrying the map operation is
2958 * likely to succeed.
2960 void *address_space_map(AddressSpace *as,
2961 hwaddr addr,
2962 hwaddr *plen,
2963 bool is_write)
2965 hwaddr len = *plen;
2966 hwaddr done = 0;
2967 hwaddr l, xlat, base;
2968 MemoryRegion *mr, *this_mr;
2969 ram_addr_t raddr;
2970 void *ptr;
2972 if (len == 0) {
2973 return NULL;
2976 l = len;
2977 rcu_read_lock();
2978 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2980 if (!memory_access_is_direct(mr, is_write)) {
2981 if (atomic_xchg(&bounce.in_use, true)) {
2982 rcu_read_unlock();
2983 return NULL;
2985 /* Avoid unbounded allocations */
2986 l = MIN(l, TARGET_PAGE_SIZE);
2987 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2988 bounce.addr = addr;
2989 bounce.len = l;
2991 memory_region_ref(mr);
2992 bounce.mr = mr;
2993 if (!is_write) {
2994 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2995 bounce.buffer, l);
2998 rcu_read_unlock();
2999 *plen = l;
3000 return bounce.buffer;
3003 base = xlat;
3004 raddr = memory_region_get_ram_addr(mr);
3006 for (;;) {
3007 len -= l;
3008 addr += l;
3009 done += l;
3010 if (len == 0) {
3011 break;
3014 l = len;
3015 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
3016 if (this_mr != mr || xlat != base + done) {
3017 break;
3021 memory_region_ref(mr);
3022 *plen = done;
3023 ptr = qemu_ram_ptr_length(mr->ram_block, raddr + base, plen);
3024 rcu_read_unlock();
3026 return ptr;
3029 /* Unmaps a memory region previously mapped by address_space_map().
3030 * Will also mark the memory as dirty if is_write == 1. access_len gives
3031 * the amount of memory that was actually read or written by the caller.
3033 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3034 int is_write, hwaddr access_len)
3036 if (buffer != bounce.buffer) {
3037 MemoryRegion *mr;
3038 ram_addr_t addr1;
3040 mr = qemu_ram_addr_from_host(buffer, &addr1);
3041 assert(mr != NULL);
3042 if (is_write) {
3043 invalidate_and_set_dirty(mr, addr1, access_len);
3045 if (xen_enabled()) {
3046 xen_invalidate_map_cache_entry(buffer);
3048 memory_region_unref(mr);
3049 return;
3051 if (is_write) {
3052 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
3053 bounce.buffer, access_len);
3055 qemu_vfree(bounce.buffer);
3056 bounce.buffer = NULL;
3057 memory_region_unref(bounce.mr);
3058 atomic_mb_set(&bounce.in_use, false);
3059 cpu_notify_map_clients();
3062 void *cpu_physical_memory_map(hwaddr addr,
3063 hwaddr *plen,
3064 int is_write)
3066 return address_space_map(&address_space_memory, addr, plen, is_write);
3069 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3070 int is_write, hwaddr access_len)
3072 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3075 /* warning: addr must be aligned */
3076 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3077 MemTxAttrs attrs,
3078 MemTxResult *result,
3079 enum device_endian endian)
3081 uint8_t *ptr;
3082 uint64_t val;
3083 MemoryRegion *mr;
3084 hwaddr l = 4;
3085 hwaddr addr1;
3086 MemTxResult r;
3087 bool release_lock = false;
3089 rcu_read_lock();
3090 mr = address_space_translate(as, addr, &addr1, &l, false);
3091 if (l < 4 || !memory_access_is_direct(mr, false)) {
3092 release_lock |= prepare_mmio_access(mr);
3094 /* I/O case */
3095 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3096 #if defined(TARGET_WORDS_BIGENDIAN)
3097 if (endian == DEVICE_LITTLE_ENDIAN) {
3098 val = bswap32(val);
3100 #else
3101 if (endian == DEVICE_BIG_ENDIAN) {
3102 val = bswap32(val);
3104 #endif
3105 } else {
3106 /* RAM case */
3107 ptr = qemu_get_ram_ptr(mr->ram_block,
3108 memory_region_get_ram_addr(mr) + addr1);
3109 switch (endian) {
3110 case DEVICE_LITTLE_ENDIAN:
3111 val = ldl_le_p(ptr);
3112 break;
3113 case DEVICE_BIG_ENDIAN:
3114 val = ldl_be_p(ptr);
3115 break;
3116 default:
3117 val = ldl_p(ptr);
3118 break;
3120 r = MEMTX_OK;
3122 if (result) {
3123 *result = r;
3125 if (release_lock) {
3126 qemu_mutex_unlock_iothread();
3128 rcu_read_unlock();
3129 return val;
3132 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3133 MemTxAttrs attrs, MemTxResult *result)
3135 return address_space_ldl_internal(as, addr, attrs, result,
3136 DEVICE_NATIVE_ENDIAN);
3139 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3140 MemTxAttrs attrs, MemTxResult *result)
3142 return address_space_ldl_internal(as, addr, attrs, result,
3143 DEVICE_LITTLE_ENDIAN);
3146 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3147 MemTxAttrs attrs, MemTxResult *result)
3149 return address_space_ldl_internal(as, addr, attrs, result,
3150 DEVICE_BIG_ENDIAN);
3153 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3155 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3158 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3160 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3163 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3165 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3168 /* warning: addr must be aligned */
3169 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3170 MemTxAttrs attrs,
3171 MemTxResult *result,
3172 enum device_endian endian)
3174 uint8_t *ptr;
3175 uint64_t val;
3176 MemoryRegion *mr;
3177 hwaddr l = 8;
3178 hwaddr addr1;
3179 MemTxResult r;
3180 bool release_lock = false;
3182 rcu_read_lock();
3183 mr = address_space_translate(as, addr, &addr1, &l,
3184 false);
3185 if (l < 8 || !memory_access_is_direct(mr, false)) {
3186 release_lock |= prepare_mmio_access(mr);
3188 /* I/O case */
3189 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3190 #if defined(TARGET_WORDS_BIGENDIAN)
3191 if (endian == DEVICE_LITTLE_ENDIAN) {
3192 val = bswap64(val);
3194 #else
3195 if (endian == DEVICE_BIG_ENDIAN) {
3196 val = bswap64(val);
3198 #endif
3199 } else {
3200 /* RAM case */
3201 ptr = qemu_get_ram_ptr(mr->ram_block,
3202 memory_region_get_ram_addr(mr) + addr1);
3203 switch (endian) {
3204 case DEVICE_LITTLE_ENDIAN:
3205 val = ldq_le_p(ptr);
3206 break;
3207 case DEVICE_BIG_ENDIAN:
3208 val = ldq_be_p(ptr);
3209 break;
3210 default:
3211 val = ldq_p(ptr);
3212 break;
3214 r = MEMTX_OK;
3216 if (result) {
3217 *result = r;
3219 if (release_lock) {
3220 qemu_mutex_unlock_iothread();
3222 rcu_read_unlock();
3223 return val;
3226 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3227 MemTxAttrs attrs, MemTxResult *result)
3229 return address_space_ldq_internal(as, addr, attrs, result,
3230 DEVICE_NATIVE_ENDIAN);
3233 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3234 MemTxAttrs attrs, MemTxResult *result)
3236 return address_space_ldq_internal(as, addr, attrs, result,
3237 DEVICE_LITTLE_ENDIAN);
3240 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3241 MemTxAttrs attrs, MemTxResult *result)
3243 return address_space_ldq_internal(as, addr, attrs, result,
3244 DEVICE_BIG_ENDIAN);
3247 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3249 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3252 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3254 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3257 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3259 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3262 /* XXX: optimize */
3263 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3264 MemTxAttrs attrs, MemTxResult *result)
3266 uint8_t val;
3267 MemTxResult r;
3269 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3270 if (result) {
3271 *result = r;
3273 return val;
3276 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3278 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3281 /* warning: addr must be aligned */
3282 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3283 hwaddr addr,
3284 MemTxAttrs attrs,
3285 MemTxResult *result,
3286 enum device_endian endian)
3288 uint8_t *ptr;
3289 uint64_t val;
3290 MemoryRegion *mr;
3291 hwaddr l = 2;
3292 hwaddr addr1;
3293 MemTxResult r;
3294 bool release_lock = false;
3296 rcu_read_lock();
3297 mr = address_space_translate(as, addr, &addr1, &l,
3298 false);
3299 if (l < 2 || !memory_access_is_direct(mr, false)) {
3300 release_lock |= prepare_mmio_access(mr);
3302 /* I/O case */
3303 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3304 #if defined(TARGET_WORDS_BIGENDIAN)
3305 if (endian == DEVICE_LITTLE_ENDIAN) {
3306 val = bswap16(val);
3308 #else
3309 if (endian == DEVICE_BIG_ENDIAN) {
3310 val = bswap16(val);
3312 #endif
3313 } else {
3314 /* RAM case */
3315 ptr = qemu_get_ram_ptr(mr->ram_block,
3316 memory_region_get_ram_addr(mr) + addr1);
3317 switch (endian) {
3318 case DEVICE_LITTLE_ENDIAN:
3319 val = lduw_le_p(ptr);
3320 break;
3321 case DEVICE_BIG_ENDIAN:
3322 val = lduw_be_p(ptr);
3323 break;
3324 default:
3325 val = lduw_p(ptr);
3326 break;
3328 r = MEMTX_OK;
3330 if (result) {
3331 *result = r;
3333 if (release_lock) {
3334 qemu_mutex_unlock_iothread();
3336 rcu_read_unlock();
3337 return val;
3340 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3341 MemTxAttrs attrs, MemTxResult *result)
3343 return address_space_lduw_internal(as, addr, attrs, result,
3344 DEVICE_NATIVE_ENDIAN);
3347 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3348 MemTxAttrs attrs, MemTxResult *result)
3350 return address_space_lduw_internal(as, addr, attrs, result,
3351 DEVICE_LITTLE_ENDIAN);
3354 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3355 MemTxAttrs attrs, MemTxResult *result)
3357 return address_space_lduw_internal(as, addr, attrs, result,
3358 DEVICE_BIG_ENDIAN);
3361 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3363 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3366 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3368 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3371 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3373 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3376 /* warning: addr must be aligned. The ram page is not masked as dirty
3377 and the code inside is not invalidated. It is useful if the dirty
3378 bits are used to track modified PTEs */
3379 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3380 MemTxAttrs attrs, MemTxResult *result)
3382 uint8_t *ptr;
3383 MemoryRegion *mr;
3384 hwaddr l = 4;
3385 hwaddr addr1;
3386 MemTxResult r;
3387 uint8_t dirty_log_mask;
3388 bool release_lock = false;
3390 rcu_read_lock();
3391 mr = address_space_translate(as, addr, &addr1, &l,
3392 true);
3393 if (l < 4 || !memory_access_is_direct(mr, true)) {
3394 release_lock |= prepare_mmio_access(mr);
3396 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3397 } else {
3398 addr1 += memory_region_get_ram_addr(mr);
3399 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3400 stl_p(ptr, val);
3402 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3403 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3404 cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3405 r = MEMTX_OK;
3407 if (result) {
3408 *result = r;
3410 if (release_lock) {
3411 qemu_mutex_unlock_iothread();
3413 rcu_read_unlock();
3416 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3418 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3421 /* warning: addr must be aligned */
3422 static inline void address_space_stl_internal(AddressSpace *as,
3423 hwaddr addr, uint32_t val,
3424 MemTxAttrs attrs,
3425 MemTxResult *result,
3426 enum device_endian endian)
3428 uint8_t *ptr;
3429 MemoryRegion *mr;
3430 hwaddr l = 4;
3431 hwaddr addr1;
3432 MemTxResult r;
3433 bool release_lock = false;
3435 rcu_read_lock();
3436 mr = address_space_translate(as, addr, &addr1, &l,
3437 true);
3438 if (l < 4 || !memory_access_is_direct(mr, true)) {
3439 release_lock |= prepare_mmio_access(mr);
3441 #if defined(TARGET_WORDS_BIGENDIAN)
3442 if (endian == DEVICE_LITTLE_ENDIAN) {
3443 val = bswap32(val);
3445 #else
3446 if (endian == DEVICE_BIG_ENDIAN) {
3447 val = bswap32(val);
3449 #endif
3450 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3451 } else {
3452 /* RAM case */
3453 addr1 += memory_region_get_ram_addr(mr);
3454 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3455 switch (endian) {
3456 case DEVICE_LITTLE_ENDIAN:
3457 stl_le_p(ptr, val);
3458 break;
3459 case DEVICE_BIG_ENDIAN:
3460 stl_be_p(ptr, val);
3461 break;
3462 default:
3463 stl_p(ptr, val);
3464 break;
3466 invalidate_and_set_dirty(mr, addr1, 4);
3467 r = MEMTX_OK;
3469 if (result) {
3470 *result = r;
3472 if (release_lock) {
3473 qemu_mutex_unlock_iothread();
3475 rcu_read_unlock();
3478 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3479 MemTxAttrs attrs, MemTxResult *result)
3481 address_space_stl_internal(as, addr, val, attrs, result,
3482 DEVICE_NATIVE_ENDIAN);
3485 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3486 MemTxAttrs attrs, MemTxResult *result)
3488 address_space_stl_internal(as, addr, val, attrs, result,
3489 DEVICE_LITTLE_ENDIAN);
3492 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3493 MemTxAttrs attrs, MemTxResult *result)
3495 address_space_stl_internal(as, addr, val, attrs, result,
3496 DEVICE_BIG_ENDIAN);
3499 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3501 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3504 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3506 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3509 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3511 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3514 /* XXX: optimize */
3515 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3516 MemTxAttrs attrs, MemTxResult *result)
3518 uint8_t v = val;
3519 MemTxResult r;
3521 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3522 if (result) {
3523 *result = r;
3527 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3529 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3532 /* warning: addr must be aligned */
3533 static inline void address_space_stw_internal(AddressSpace *as,
3534 hwaddr addr, uint32_t val,
3535 MemTxAttrs attrs,
3536 MemTxResult *result,
3537 enum device_endian endian)
3539 uint8_t *ptr;
3540 MemoryRegion *mr;
3541 hwaddr l = 2;
3542 hwaddr addr1;
3543 MemTxResult r;
3544 bool release_lock = false;
3546 rcu_read_lock();
3547 mr = address_space_translate(as, addr, &addr1, &l, true);
3548 if (l < 2 || !memory_access_is_direct(mr, true)) {
3549 release_lock |= prepare_mmio_access(mr);
3551 #if defined(TARGET_WORDS_BIGENDIAN)
3552 if (endian == DEVICE_LITTLE_ENDIAN) {
3553 val = bswap16(val);
3555 #else
3556 if (endian == DEVICE_BIG_ENDIAN) {
3557 val = bswap16(val);
3559 #endif
3560 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3561 } else {
3562 /* RAM case */
3563 addr1 += memory_region_get_ram_addr(mr);
3564 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3565 switch (endian) {
3566 case DEVICE_LITTLE_ENDIAN:
3567 stw_le_p(ptr, val);
3568 break;
3569 case DEVICE_BIG_ENDIAN:
3570 stw_be_p(ptr, val);
3571 break;
3572 default:
3573 stw_p(ptr, val);
3574 break;
3576 invalidate_and_set_dirty(mr, addr1, 2);
3577 r = MEMTX_OK;
3579 if (result) {
3580 *result = r;
3582 if (release_lock) {
3583 qemu_mutex_unlock_iothread();
3585 rcu_read_unlock();
3588 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3589 MemTxAttrs attrs, MemTxResult *result)
3591 address_space_stw_internal(as, addr, val, attrs, result,
3592 DEVICE_NATIVE_ENDIAN);
3595 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3596 MemTxAttrs attrs, MemTxResult *result)
3598 address_space_stw_internal(as, addr, val, attrs, result,
3599 DEVICE_LITTLE_ENDIAN);
3602 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3603 MemTxAttrs attrs, MemTxResult *result)
3605 address_space_stw_internal(as, addr, val, attrs, result,
3606 DEVICE_BIG_ENDIAN);
3609 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3611 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3614 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3616 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3619 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3621 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3624 /* XXX: optimize */
3625 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3626 MemTxAttrs attrs, MemTxResult *result)
3628 MemTxResult r;
3629 val = tswap64(val);
3630 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3631 if (result) {
3632 *result = r;
3636 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3637 MemTxAttrs attrs, MemTxResult *result)
3639 MemTxResult r;
3640 val = cpu_to_le64(val);
3641 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3642 if (result) {
3643 *result = r;
3646 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3647 MemTxAttrs attrs, MemTxResult *result)
3649 MemTxResult r;
3650 val = cpu_to_be64(val);
3651 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3652 if (result) {
3653 *result = r;
3657 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3659 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3662 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3664 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3667 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3669 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3672 /* virtual memory access for debug (includes writing to ROM) */
3673 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3674 uint8_t *buf, int len, int is_write)
3676 int l;
3677 hwaddr phys_addr;
3678 target_ulong page;
3680 while (len > 0) {
3681 int asidx;
3682 MemTxAttrs attrs;
3684 page = addr & TARGET_PAGE_MASK;
3685 phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3686 asidx = cpu_asidx_from_attrs(cpu, attrs);
3687 /* if no physical page mapped, return an error */
3688 if (phys_addr == -1)
3689 return -1;
3690 l = (page + TARGET_PAGE_SIZE) - addr;
3691 if (l > len)
3692 l = len;
3693 phys_addr += (addr & ~TARGET_PAGE_MASK);
3694 if (is_write) {
3695 cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3696 phys_addr, buf, l);
3697 } else {
3698 address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3699 MEMTXATTRS_UNSPECIFIED,
3700 buf, l, 0);
3702 len -= l;
3703 buf += l;
3704 addr += l;
3706 return 0;
3710 * Allows code that needs to deal with migration bitmaps etc to still be built
3711 * target independent.
3713 size_t qemu_target_page_bits(void)
3715 return TARGET_PAGE_BITS;
3718 #endif
3721 * A helper function for the _utterly broken_ virtio device model to find out if
3722 * it's running on a big endian machine. Don't do this at home kids!
3724 bool target_words_bigendian(void);
3725 bool target_words_bigendian(void)
3727 #if defined(TARGET_WORDS_BIGENDIAN)
3728 return true;
3729 #else
3730 return false;
3731 #endif
3734 #ifndef CONFIG_USER_ONLY
3735 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3737 MemoryRegion*mr;
3738 hwaddr l = 1;
3739 bool res;
3741 rcu_read_lock();
3742 mr = address_space_translate(&address_space_memory,
3743 phys_addr, &phys_addr, &l, false);
3745 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3746 rcu_read_unlock();
3747 return res;
3750 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3752 RAMBlock *block;
3753 int ret = 0;
3755 rcu_read_lock();
3756 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3757 ret = func(block->idstr, block->host, block->offset,
3758 block->used_length, opaque);
3759 if (ret) {
3760 break;
3763 rcu_read_unlock();
3764 return ret;
3766 #endif