vl: change runstate only if new state is different from current state
[qemu/cris-port.git] / exec.c
blob59aed1782812953a4262fa23edcd24d58556d5dc
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include "qapi/error.h"
21 #ifndef _WIN32
22 #include <sys/mman.h>
23 #endif
25 #include "qemu/cutils.h"
26 #include "cpu.h"
27 #include "exec/exec-all.h"
28 #include "tcg.h"
29 #include "hw/qdev-core.h"
30 #if !defined(CONFIG_USER_ONLY)
31 #include "hw/boards.h"
32 #include "hw/xen/xen.h"
33 #endif
34 #include "sysemu/kvm.h"
35 #include "sysemu/sysemu.h"
36 #include "qemu/timer.h"
37 #include "qemu/config-file.h"
38 #include "qemu/error-report.h"
39 #if defined(CONFIG_USER_ONLY)
40 #include <qemu.h>
41 #else /* !CONFIG_USER_ONLY */
42 #include "hw/hw.h"
43 #include "exec/memory.h"
44 #include "exec/ioport.h"
45 #include "sysemu/dma.h"
46 #include "exec/address-spaces.h"
47 #include "sysemu/xen-mapcache.h"
48 #include "trace.h"
49 #endif
50 #include "exec/cpu-all.h"
51 #include "qemu/rcu_queue.h"
52 #include "qemu/main-loop.h"
53 #include "translate-all.h"
54 #include "sysemu/replay.h"
56 #include "exec/memory-internal.h"
57 #include "exec/ram_addr.h"
58 #include "exec/log.h"
60 #include "qemu/range.h"
61 #ifndef _WIN32
62 #include "qemu/mmap-alloc.h"
63 #endif
65 //#define DEBUG_SUBPAGE
67 #if !defined(CONFIG_USER_ONLY)
68 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
69 * are protected by the ramlist lock.
71 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
73 static MemoryRegion *system_memory;
74 static MemoryRegion *system_io;
76 AddressSpace address_space_io;
77 AddressSpace address_space_memory;
79 MemoryRegion io_mem_rom, io_mem_notdirty;
80 static MemoryRegion io_mem_unassigned;
82 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
83 #define RAM_PREALLOC (1 << 0)
85 /* RAM is mmap-ed with MAP_SHARED */
86 #define RAM_SHARED (1 << 1)
88 /* Only a portion of RAM (used_length) is actually used, and migrated.
89 * This used_length size can change across reboots.
91 #define RAM_RESIZEABLE (1 << 2)
93 #endif
95 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
96 /* current CPU in the current thread. It is only valid inside
97 cpu_exec() */
98 __thread CPUState *current_cpu;
99 /* 0 = Do not count executed instructions.
100 1 = Precise instruction counting.
101 2 = Adaptive rate instruction counting. */
102 int use_icount;
104 #if !defined(CONFIG_USER_ONLY)
106 typedef struct PhysPageEntry PhysPageEntry;
108 struct PhysPageEntry {
109 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
110 uint32_t skip : 6;
111 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
112 uint32_t ptr : 26;
115 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
117 /* Size of the L2 (and L3, etc) page tables. */
118 #define ADDR_SPACE_BITS 64
120 #define P_L2_BITS 9
121 #define P_L2_SIZE (1 << P_L2_BITS)
123 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
125 typedef PhysPageEntry Node[P_L2_SIZE];
127 typedef struct PhysPageMap {
128 struct rcu_head rcu;
130 unsigned sections_nb;
131 unsigned sections_nb_alloc;
132 unsigned nodes_nb;
133 unsigned nodes_nb_alloc;
134 Node *nodes;
135 MemoryRegionSection *sections;
136 } PhysPageMap;
138 struct AddressSpaceDispatch {
139 struct rcu_head rcu;
141 MemoryRegionSection *mru_section;
142 /* This is a multi-level map on the physical address space.
143 * The bottom level has pointers to MemoryRegionSections.
145 PhysPageEntry phys_map;
146 PhysPageMap map;
147 AddressSpace *as;
150 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
151 typedef struct subpage_t {
152 MemoryRegion iomem;
153 AddressSpace *as;
154 hwaddr base;
155 uint16_t sub_section[TARGET_PAGE_SIZE];
156 } subpage_t;
158 #define PHYS_SECTION_UNASSIGNED 0
159 #define PHYS_SECTION_NOTDIRTY 1
160 #define PHYS_SECTION_ROM 2
161 #define PHYS_SECTION_WATCH 3
163 static void io_mem_init(void);
164 static void memory_map_init(void);
165 static void tcg_commit(MemoryListener *listener);
167 static MemoryRegion io_mem_watch;
170 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
171 * @cpu: the CPU whose AddressSpace this is
172 * @as: the AddressSpace itself
173 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
174 * @tcg_as_listener: listener for tracking changes to the AddressSpace
176 struct CPUAddressSpace {
177 CPUState *cpu;
178 AddressSpace *as;
179 struct AddressSpaceDispatch *memory_dispatch;
180 MemoryListener tcg_as_listener;
183 #endif
185 #if !defined(CONFIG_USER_ONLY)
187 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
189 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
190 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
191 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
192 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
196 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
198 unsigned i;
199 uint32_t ret;
200 PhysPageEntry e;
201 PhysPageEntry *p;
203 ret = map->nodes_nb++;
204 p = map->nodes[ret];
205 assert(ret != PHYS_MAP_NODE_NIL);
206 assert(ret != map->nodes_nb_alloc);
208 e.skip = leaf ? 0 : 1;
209 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
210 for (i = 0; i < P_L2_SIZE; ++i) {
211 memcpy(&p[i], &e, sizeof(e));
213 return ret;
216 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
217 hwaddr *index, hwaddr *nb, uint16_t leaf,
218 int level)
220 PhysPageEntry *p;
221 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
223 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
224 lp->ptr = phys_map_node_alloc(map, level == 0);
226 p = map->nodes[lp->ptr];
227 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
229 while (*nb && lp < &p[P_L2_SIZE]) {
230 if ((*index & (step - 1)) == 0 && *nb >= step) {
231 lp->skip = 0;
232 lp->ptr = leaf;
233 *index += step;
234 *nb -= step;
235 } else {
236 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
238 ++lp;
242 static void phys_page_set(AddressSpaceDispatch *d,
243 hwaddr index, hwaddr nb,
244 uint16_t leaf)
246 /* Wildly overreserve - it doesn't matter much. */
247 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
249 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
252 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
253 * and update our entry so we can skip it and go directly to the destination.
255 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
257 unsigned valid_ptr = P_L2_SIZE;
258 int valid = 0;
259 PhysPageEntry *p;
260 int i;
262 if (lp->ptr == PHYS_MAP_NODE_NIL) {
263 return;
266 p = nodes[lp->ptr];
267 for (i = 0; i < P_L2_SIZE; i++) {
268 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
269 continue;
272 valid_ptr = i;
273 valid++;
274 if (p[i].skip) {
275 phys_page_compact(&p[i], nodes, compacted);
279 /* We can only compress if there's only one child. */
280 if (valid != 1) {
281 return;
284 assert(valid_ptr < P_L2_SIZE);
286 /* Don't compress if it won't fit in the # of bits we have. */
287 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
288 return;
291 lp->ptr = p[valid_ptr].ptr;
292 if (!p[valid_ptr].skip) {
293 /* If our only child is a leaf, make this a leaf. */
294 /* By design, we should have made this node a leaf to begin with so we
295 * should never reach here.
296 * But since it's so simple to handle this, let's do it just in case we
297 * change this rule.
299 lp->skip = 0;
300 } else {
301 lp->skip += p[valid_ptr].skip;
305 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
307 DECLARE_BITMAP(compacted, nodes_nb);
309 if (d->phys_map.skip) {
310 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
314 static inline bool section_covers_addr(const MemoryRegionSection *section,
315 hwaddr addr)
317 /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
318 * the section must cover the entire address space.
320 return section->size.hi ||
321 range_covers_byte(section->offset_within_address_space,
322 section->size.lo, addr);
325 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
326 Node *nodes, MemoryRegionSection *sections)
328 PhysPageEntry *p;
329 hwaddr index = addr >> TARGET_PAGE_BITS;
330 int i;
332 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
333 if (lp.ptr == PHYS_MAP_NODE_NIL) {
334 return &sections[PHYS_SECTION_UNASSIGNED];
336 p = nodes[lp.ptr];
337 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
340 if (section_covers_addr(&sections[lp.ptr], addr)) {
341 return &sections[lp.ptr];
342 } else {
343 return &sections[PHYS_SECTION_UNASSIGNED];
347 bool memory_region_is_unassigned(MemoryRegion *mr)
349 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
350 && mr != &io_mem_watch;
353 /* Called from RCU critical section */
354 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
355 hwaddr addr,
356 bool resolve_subpage)
358 MemoryRegionSection *section = atomic_read(&d->mru_section);
359 subpage_t *subpage;
360 bool update;
362 if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
363 section_covers_addr(section, addr)) {
364 update = false;
365 } else {
366 section = phys_page_find(d->phys_map, addr, d->map.nodes,
367 d->map.sections);
368 update = true;
370 if (resolve_subpage && section->mr->subpage) {
371 subpage = container_of(section->mr, subpage_t, iomem);
372 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
374 if (update) {
375 atomic_set(&d->mru_section, section);
377 return section;
380 /* Called from RCU critical section */
381 static MemoryRegionSection *
382 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
383 hwaddr *plen, bool resolve_subpage)
385 MemoryRegionSection *section;
386 MemoryRegion *mr;
387 Int128 diff;
389 section = address_space_lookup_region(d, addr, resolve_subpage);
390 /* Compute offset within MemoryRegionSection */
391 addr -= section->offset_within_address_space;
393 /* Compute offset within MemoryRegion */
394 *xlat = addr + section->offset_within_region;
396 mr = section->mr;
398 /* MMIO registers can be expected to perform full-width accesses based only
399 * on their address, without considering adjacent registers that could
400 * decode to completely different MemoryRegions. When such registers
401 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
402 * regions overlap wildly. For this reason we cannot clamp the accesses
403 * here.
405 * If the length is small (as is the case for address_space_ldl/stl),
406 * everything works fine. If the incoming length is large, however,
407 * the caller really has to do the clamping through memory_access_size.
409 if (memory_region_is_ram(mr)) {
410 diff = int128_sub(section->size, int128_make64(addr));
411 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
413 return section;
416 /* Called from RCU critical section */
417 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
418 hwaddr *xlat, hwaddr *plen,
419 bool is_write)
421 IOMMUTLBEntry iotlb;
422 MemoryRegionSection *section;
423 MemoryRegion *mr;
425 for (;;) {
426 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
427 section = address_space_translate_internal(d, addr, &addr, plen, true);
428 mr = section->mr;
430 if (!mr->iommu_ops) {
431 break;
434 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
435 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
436 | (addr & iotlb.addr_mask));
437 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
438 if (!(iotlb.perm & (1 << is_write))) {
439 mr = &io_mem_unassigned;
440 break;
443 as = iotlb.target_as;
446 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
447 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
448 *plen = MIN(page, *plen);
451 *xlat = addr;
452 return mr;
455 /* Called from RCU critical section */
456 MemoryRegionSection *
457 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
458 hwaddr *xlat, hwaddr *plen)
460 MemoryRegionSection *section;
461 AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;
463 section = address_space_translate_internal(d, addr, xlat, plen, false);
465 assert(!section->mr->iommu_ops);
466 return section;
468 #endif
470 #if !defined(CONFIG_USER_ONLY)
472 static int cpu_common_post_load(void *opaque, int version_id)
474 CPUState *cpu = opaque;
476 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
477 version_id is increased. */
478 cpu->interrupt_request &= ~0x01;
479 tlb_flush(cpu, 1);
481 return 0;
484 static int cpu_common_pre_load(void *opaque)
486 CPUState *cpu = opaque;
488 cpu->exception_index = -1;
490 return 0;
493 static bool cpu_common_exception_index_needed(void *opaque)
495 CPUState *cpu = opaque;
497 return tcg_enabled() && cpu->exception_index != -1;
500 static const VMStateDescription vmstate_cpu_common_exception_index = {
501 .name = "cpu_common/exception_index",
502 .version_id = 1,
503 .minimum_version_id = 1,
504 .needed = cpu_common_exception_index_needed,
505 .fields = (VMStateField[]) {
506 VMSTATE_INT32(exception_index, CPUState),
507 VMSTATE_END_OF_LIST()
511 static bool cpu_common_crash_occurred_needed(void *opaque)
513 CPUState *cpu = opaque;
515 return cpu->crash_occurred;
518 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
519 .name = "cpu_common/crash_occurred",
520 .version_id = 1,
521 .minimum_version_id = 1,
522 .needed = cpu_common_crash_occurred_needed,
523 .fields = (VMStateField[]) {
524 VMSTATE_BOOL(crash_occurred, CPUState),
525 VMSTATE_END_OF_LIST()
529 const VMStateDescription vmstate_cpu_common = {
530 .name = "cpu_common",
531 .version_id = 1,
532 .minimum_version_id = 1,
533 .pre_load = cpu_common_pre_load,
534 .post_load = cpu_common_post_load,
535 .fields = (VMStateField[]) {
536 VMSTATE_UINT32(halted, CPUState),
537 VMSTATE_UINT32(interrupt_request, CPUState),
538 VMSTATE_END_OF_LIST()
540 .subsections = (const VMStateDescription*[]) {
541 &vmstate_cpu_common_exception_index,
542 &vmstate_cpu_common_crash_occurred,
543 NULL
547 #endif
549 CPUState *qemu_get_cpu(int index)
551 CPUState *cpu;
553 CPU_FOREACH(cpu) {
554 if (cpu->cpu_index == index) {
555 return cpu;
559 return NULL;
562 #if !defined(CONFIG_USER_ONLY)
563 void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
565 CPUAddressSpace *newas;
567 /* Target code should have set num_ases before calling us */
568 assert(asidx < cpu->num_ases);
570 if (asidx == 0) {
571 /* address space 0 gets the convenience alias */
572 cpu->as = as;
575 /* KVM cannot currently support multiple address spaces. */
576 assert(asidx == 0 || !kvm_enabled());
578 if (!cpu->cpu_ases) {
579 cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
582 newas = &cpu->cpu_ases[asidx];
583 newas->cpu = cpu;
584 newas->as = as;
585 if (tcg_enabled()) {
586 newas->tcg_as_listener.commit = tcg_commit;
587 memory_listener_register(&newas->tcg_as_listener, as);
591 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
593 /* Return the AddressSpace corresponding to the specified index */
594 return cpu->cpu_ases[asidx].as;
596 #endif
598 #ifndef CONFIG_USER_ONLY
599 static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
601 static int cpu_get_free_index(Error **errp)
603 int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
605 if (cpu >= MAX_CPUMASK_BITS) {
606 error_setg(errp, "Trying to use more CPUs than max of %d",
607 MAX_CPUMASK_BITS);
608 return -1;
611 bitmap_set(cpu_index_map, cpu, 1);
612 return cpu;
615 void cpu_exec_exit(CPUState *cpu)
617 if (cpu->cpu_index == -1) {
618 /* cpu_index was never allocated by this @cpu or was already freed. */
619 return;
622 bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
623 cpu->cpu_index = -1;
625 #else
627 static int cpu_get_free_index(Error **errp)
629 CPUState *some_cpu;
630 int cpu_index = 0;
632 CPU_FOREACH(some_cpu) {
633 cpu_index++;
635 return cpu_index;
638 void cpu_exec_exit(CPUState *cpu)
641 #endif
643 void cpu_exec_init(CPUState *cpu, Error **errp)
645 CPUClass *cc = CPU_GET_CLASS(cpu);
646 Error *local_err = NULL;
648 cpu->as = NULL;
649 cpu->num_ases = 0;
651 #ifndef CONFIG_USER_ONLY
652 cpu->thread_id = qemu_get_thread_id();
654 /* This is a softmmu CPU object, so create a property for it
655 * so users can wire up its memory. (This can't go in qom/cpu.c
656 * because that file is compiled only once for both user-mode
657 * and system builds.) The default if no link is set up is to use
658 * the system address space.
660 object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
661 (Object **)&cpu->memory,
662 qdev_prop_allow_set_link_before_realize,
663 OBJ_PROP_LINK_UNREF_ON_RELEASE,
664 &error_abort);
665 cpu->memory = system_memory;
666 object_ref(OBJECT(cpu->memory));
667 #endif
669 #if defined(CONFIG_USER_ONLY)
670 cpu_list_lock();
671 #endif
672 cpu->cpu_index = cpu_get_free_index(&local_err);
673 if (local_err) {
674 error_propagate(errp, local_err);
675 #if defined(CONFIG_USER_ONLY)
676 cpu_list_unlock();
677 #endif
678 return;
680 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
681 #if defined(CONFIG_USER_ONLY)
682 (void) cc;
683 cpu_list_unlock();
684 #else
685 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
686 vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
688 if (cc->vmsd != NULL) {
689 vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
691 #endif
694 #if defined(CONFIG_USER_ONLY)
695 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
697 tb_invalidate_phys_page_range(pc, pc + 1, 0);
699 #else
700 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
702 MemTxAttrs attrs;
703 hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
704 int asidx = cpu_asidx_from_attrs(cpu, attrs);
705 if (phys != -1) {
706 tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
707 phys | (pc & ~TARGET_PAGE_MASK));
710 #endif
712 #if defined(CONFIG_USER_ONLY)
713 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
718 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
719 int flags)
721 return -ENOSYS;
724 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
728 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
729 int flags, CPUWatchpoint **watchpoint)
731 return -ENOSYS;
733 #else
734 /* Add a watchpoint. */
735 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
736 int flags, CPUWatchpoint **watchpoint)
738 CPUWatchpoint *wp;
740 /* forbid ranges which are empty or run off the end of the address space */
741 if (len == 0 || (addr + len - 1) < addr) {
742 error_report("tried to set invalid watchpoint at %"
743 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
744 return -EINVAL;
746 wp = g_malloc(sizeof(*wp));
748 wp->vaddr = addr;
749 wp->len = len;
750 wp->flags = flags;
752 /* keep all GDB-injected watchpoints in front */
753 if (flags & BP_GDB) {
754 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
755 } else {
756 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
759 tlb_flush_page(cpu, addr);
761 if (watchpoint)
762 *watchpoint = wp;
763 return 0;
766 /* Remove a specific watchpoint. */
767 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
768 int flags)
770 CPUWatchpoint *wp;
772 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
773 if (addr == wp->vaddr && len == wp->len
774 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
775 cpu_watchpoint_remove_by_ref(cpu, wp);
776 return 0;
779 return -ENOENT;
782 /* Remove a specific watchpoint by reference. */
783 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
785 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
787 tlb_flush_page(cpu, watchpoint->vaddr);
789 g_free(watchpoint);
792 /* Remove all matching watchpoints. */
793 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
795 CPUWatchpoint *wp, *next;
797 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
798 if (wp->flags & mask) {
799 cpu_watchpoint_remove_by_ref(cpu, wp);
804 /* Return true if this watchpoint address matches the specified
805 * access (ie the address range covered by the watchpoint overlaps
806 * partially or completely with the address range covered by the
807 * access).
809 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
810 vaddr addr,
811 vaddr len)
813 /* We know the lengths are non-zero, but a little caution is
814 * required to avoid errors in the case where the range ends
815 * exactly at the top of the address space and so addr + len
816 * wraps round to zero.
818 vaddr wpend = wp->vaddr + wp->len - 1;
819 vaddr addrend = addr + len - 1;
821 return !(addr > wpend || wp->vaddr > addrend);
824 #endif
826 /* Add a breakpoint. */
827 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
828 CPUBreakpoint **breakpoint)
830 CPUBreakpoint *bp;
832 bp = g_malloc(sizeof(*bp));
834 bp->pc = pc;
835 bp->flags = flags;
837 /* keep all GDB-injected breakpoints in front */
838 if (flags & BP_GDB) {
839 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
840 } else {
841 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
844 breakpoint_invalidate(cpu, pc);
846 if (breakpoint) {
847 *breakpoint = bp;
849 return 0;
852 /* Remove a specific breakpoint. */
853 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
855 CPUBreakpoint *bp;
857 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
858 if (bp->pc == pc && bp->flags == flags) {
859 cpu_breakpoint_remove_by_ref(cpu, bp);
860 return 0;
863 return -ENOENT;
866 /* Remove a specific breakpoint by reference. */
867 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
869 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
871 breakpoint_invalidate(cpu, breakpoint->pc);
873 g_free(breakpoint);
876 /* Remove all matching breakpoints. */
877 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
879 CPUBreakpoint *bp, *next;
881 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
882 if (bp->flags & mask) {
883 cpu_breakpoint_remove_by_ref(cpu, bp);
888 /* enable or disable single step mode. EXCP_DEBUG is returned by the
889 CPU loop after each instruction */
890 void cpu_single_step(CPUState *cpu, int enabled)
892 if (cpu->singlestep_enabled != enabled) {
893 cpu->singlestep_enabled = enabled;
894 if (kvm_enabled()) {
895 kvm_update_guest_debug(cpu, 0);
896 } else {
897 /* must flush all the translated code to avoid inconsistencies */
898 /* XXX: only flush what is necessary */
899 tb_flush(cpu);
904 void cpu_abort(CPUState *cpu, const char *fmt, ...)
906 va_list ap;
907 va_list ap2;
909 va_start(ap, fmt);
910 va_copy(ap2, ap);
911 fprintf(stderr, "qemu: fatal: ");
912 vfprintf(stderr, fmt, ap);
913 fprintf(stderr, "\n");
914 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
915 if (qemu_log_separate()) {
916 qemu_log("qemu: fatal: ");
917 qemu_log_vprintf(fmt, ap2);
918 qemu_log("\n");
919 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
920 qemu_log_flush();
921 qemu_log_close();
923 va_end(ap2);
924 va_end(ap);
925 replay_finish();
926 #if defined(CONFIG_USER_ONLY)
928 struct sigaction act;
929 sigfillset(&act.sa_mask);
930 act.sa_handler = SIG_DFL;
931 sigaction(SIGABRT, &act, NULL);
933 #endif
934 abort();
937 #if !defined(CONFIG_USER_ONLY)
938 /* Called from RCU critical section */
939 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
941 RAMBlock *block;
943 block = atomic_rcu_read(&ram_list.mru_block);
944 if (block && addr - block->offset < block->max_length) {
945 return block;
947 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
948 if (addr - block->offset < block->max_length) {
949 goto found;
953 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
954 abort();
956 found:
957 /* It is safe to write mru_block outside the iothread lock. This
958 * is what happens:
960 * mru_block = xxx
961 * rcu_read_unlock()
962 * xxx removed from list
963 * rcu_read_lock()
964 * read mru_block
965 * mru_block = NULL;
966 * call_rcu(reclaim_ramblock, xxx);
967 * rcu_read_unlock()
969 * atomic_rcu_set is not needed here. The block was already published
970 * when it was placed into the list. Here we're just making an extra
971 * copy of the pointer.
973 ram_list.mru_block = block;
974 return block;
977 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
979 CPUState *cpu;
980 ram_addr_t start1;
981 RAMBlock *block;
982 ram_addr_t end;
984 end = TARGET_PAGE_ALIGN(start + length);
985 start &= TARGET_PAGE_MASK;
987 rcu_read_lock();
988 block = qemu_get_ram_block(start);
989 assert(block == qemu_get_ram_block(end - 1));
990 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
991 CPU_FOREACH(cpu) {
992 tlb_reset_dirty(cpu, start1, length);
994 rcu_read_unlock();
997 /* Note: start and end must be within the same ram block. */
998 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
999 ram_addr_t length,
1000 unsigned client)
1002 DirtyMemoryBlocks *blocks;
1003 unsigned long end, page;
1004 bool dirty = false;
1006 if (length == 0) {
1007 return false;
1010 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
1011 page = start >> TARGET_PAGE_BITS;
1013 rcu_read_lock();
1015 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1017 while (page < end) {
1018 unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1019 unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1020 unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1022 dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1023 offset, num);
1024 page += num;
1027 rcu_read_unlock();
1029 if (dirty && tcg_enabled()) {
1030 tlb_reset_dirty_range_all(start, length);
1033 return dirty;
1036 /* Called from RCU critical section */
1037 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1038 MemoryRegionSection *section,
1039 target_ulong vaddr,
1040 hwaddr paddr, hwaddr xlat,
1041 int prot,
1042 target_ulong *address)
1044 hwaddr iotlb;
1045 CPUWatchpoint *wp;
1047 if (memory_region_is_ram(section->mr)) {
1048 /* Normal RAM. */
1049 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1050 + xlat;
1051 if (!section->readonly) {
1052 iotlb |= PHYS_SECTION_NOTDIRTY;
1053 } else {
1054 iotlb |= PHYS_SECTION_ROM;
1056 } else {
1057 AddressSpaceDispatch *d;
1059 d = atomic_rcu_read(&section->address_space->dispatch);
1060 iotlb = section - d->map.sections;
1061 iotlb += xlat;
1064 /* Make accesses to pages with watchpoints go via the
1065 watchpoint trap routines. */
1066 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1067 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1068 /* Avoid trapping reads of pages with a write breakpoint. */
1069 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1070 iotlb = PHYS_SECTION_WATCH + paddr;
1071 *address |= TLB_MMIO;
1072 break;
1077 return iotlb;
1079 #endif /* defined(CONFIG_USER_ONLY) */
1081 #if !defined(CONFIG_USER_ONLY)
1083 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1084 uint16_t section);
1085 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1087 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1088 qemu_anon_ram_alloc;
1091 * Set a custom physical guest memory alloator.
1092 * Accelerators with unusual needs may need this. Hopefully, we can
1093 * get rid of it eventually.
1095 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1097 phys_mem_alloc = alloc;
1100 static uint16_t phys_section_add(PhysPageMap *map,
1101 MemoryRegionSection *section)
1103 /* The physical section number is ORed with a page-aligned
1104 * pointer to produce the iotlb entries. Thus it should
1105 * never overflow into the page-aligned value.
1107 assert(map->sections_nb < TARGET_PAGE_SIZE);
1109 if (map->sections_nb == map->sections_nb_alloc) {
1110 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1111 map->sections = g_renew(MemoryRegionSection, map->sections,
1112 map->sections_nb_alloc);
1114 map->sections[map->sections_nb] = *section;
1115 memory_region_ref(section->mr);
1116 return map->sections_nb++;
1119 static void phys_section_destroy(MemoryRegion *mr)
1121 bool have_sub_page = mr->subpage;
1123 memory_region_unref(mr);
1125 if (have_sub_page) {
1126 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1127 object_unref(OBJECT(&subpage->iomem));
1128 g_free(subpage);
1132 static void phys_sections_free(PhysPageMap *map)
1134 while (map->sections_nb > 0) {
1135 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1136 phys_section_destroy(section->mr);
1138 g_free(map->sections);
1139 g_free(map->nodes);
1142 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1144 subpage_t *subpage;
1145 hwaddr base = section->offset_within_address_space
1146 & TARGET_PAGE_MASK;
1147 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1148 d->map.nodes, d->map.sections);
1149 MemoryRegionSection subsection = {
1150 .offset_within_address_space = base,
1151 .size = int128_make64(TARGET_PAGE_SIZE),
1153 hwaddr start, end;
1155 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1157 if (!(existing->mr->subpage)) {
1158 subpage = subpage_init(d->as, base);
1159 subsection.address_space = d->as;
1160 subsection.mr = &subpage->iomem;
1161 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1162 phys_section_add(&d->map, &subsection));
1163 } else {
1164 subpage = container_of(existing->mr, subpage_t, iomem);
1166 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1167 end = start + int128_get64(section->size) - 1;
1168 subpage_register(subpage, start, end,
1169 phys_section_add(&d->map, section));
1173 static void register_multipage(AddressSpaceDispatch *d,
1174 MemoryRegionSection *section)
1176 hwaddr start_addr = section->offset_within_address_space;
1177 uint16_t section_index = phys_section_add(&d->map, section);
1178 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1179 TARGET_PAGE_BITS));
1181 assert(num_pages);
1182 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1185 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1187 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1188 AddressSpaceDispatch *d = as->next_dispatch;
1189 MemoryRegionSection now = *section, remain = *section;
1190 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1192 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1193 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1194 - now.offset_within_address_space;
1196 now.size = int128_min(int128_make64(left), now.size);
1197 register_subpage(d, &now);
1198 } else {
1199 now.size = int128_zero();
1201 while (int128_ne(remain.size, now.size)) {
1202 remain.size = int128_sub(remain.size, now.size);
1203 remain.offset_within_address_space += int128_get64(now.size);
1204 remain.offset_within_region += int128_get64(now.size);
1205 now = remain;
1206 if (int128_lt(remain.size, page_size)) {
1207 register_subpage(d, &now);
1208 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1209 now.size = page_size;
1210 register_subpage(d, &now);
1211 } else {
1212 now.size = int128_and(now.size, int128_neg(page_size));
1213 register_multipage(d, &now);
1218 void qemu_flush_coalesced_mmio_buffer(void)
1220 if (kvm_enabled())
1221 kvm_flush_coalesced_mmio_buffer();
1224 void qemu_mutex_lock_ramlist(void)
1226 qemu_mutex_lock(&ram_list.mutex);
1229 void qemu_mutex_unlock_ramlist(void)
1231 qemu_mutex_unlock(&ram_list.mutex);
1234 #ifdef __linux__
1235 static void *file_ram_alloc(RAMBlock *block,
1236 ram_addr_t memory,
1237 const char *path,
1238 Error **errp)
1240 bool unlink_on_error = false;
1241 char *filename;
1242 char *sanitized_name;
1243 char *c;
1244 void *area;
1245 int fd = -1;
1246 int64_t page_size;
1248 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1249 error_setg(errp,
1250 "host lacks kvm mmu notifiers, -mem-path unsupported");
1251 return NULL;
1254 for (;;) {
1255 fd = open(path, O_RDWR);
1256 if (fd >= 0) {
1257 /* @path names an existing file, use it */
1258 break;
1260 if (errno == ENOENT) {
1261 /* @path names a file that doesn't exist, create it */
1262 fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1263 if (fd >= 0) {
1264 unlink_on_error = true;
1265 break;
1267 } else if (errno == EISDIR) {
1268 /* @path names a directory, create a file there */
1269 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1270 sanitized_name = g_strdup(memory_region_name(block->mr));
1271 for (c = sanitized_name; *c != '\0'; c++) {
1272 if (*c == '/') {
1273 *c = '_';
1277 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1278 sanitized_name);
1279 g_free(sanitized_name);
1281 fd = mkstemp(filename);
1282 if (fd >= 0) {
1283 unlink(filename);
1284 g_free(filename);
1285 break;
1287 g_free(filename);
1289 if (errno != EEXIST && errno != EINTR) {
1290 error_setg_errno(errp, errno,
1291 "can't open backing store %s for guest RAM",
1292 path);
1293 goto error;
1296 * Try again on EINTR and EEXIST. The latter happens when
1297 * something else creates the file between our two open().
1301 page_size = qemu_fd_getpagesize(fd);
1302 block->mr->align = MAX(page_size, QEMU_VMALLOC_ALIGN);
1304 if (memory < page_size) {
1305 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1306 "or larger than page size 0x%" PRIx64,
1307 memory, page_size);
1308 goto error;
1311 memory = ROUND_UP(memory, page_size);
1314 * ftruncate is not supported by hugetlbfs in older
1315 * hosts, so don't bother bailing out on errors.
1316 * If anything goes wrong with it under other filesystems,
1317 * mmap will fail.
1319 if (ftruncate(fd, memory)) {
1320 perror("ftruncate");
1323 area = qemu_ram_mmap(fd, memory, block->mr->align,
1324 block->flags & RAM_SHARED);
1325 if (area == MAP_FAILED) {
1326 error_setg_errno(errp, errno,
1327 "unable to map backing store for guest RAM");
1328 goto error;
1331 if (mem_prealloc) {
1332 os_mem_prealloc(fd, area, memory);
1335 block->fd = fd;
1336 return area;
1338 error:
1339 if (unlink_on_error) {
1340 unlink(path);
1342 if (fd != -1) {
1343 close(fd);
1345 return NULL;
1347 #endif
1349 /* Called with the ramlist lock held. */
1350 static ram_addr_t find_ram_offset(ram_addr_t size)
1352 RAMBlock *block, *next_block;
1353 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1355 assert(size != 0); /* it would hand out same offset multiple times */
1357 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1358 return 0;
1361 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1362 ram_addr_t end, next = RAM_ADDR_MAX;
1364 end = block->offset + block->max_length;
1366 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1367 if (next_block->offset >= end) {
1368 next = MIN(next, next_block->offset);
1371 if (next - end >= size && next - end < mingap) {
1372 offset = end;
1373 mingap = next - end;
1377 if (offset == RAM_ADDR_MAX) {
1378 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1379 (uint64_t)size);
1380 abort();
1383 return offset;
1386 ram_addr_t last_ram_offset(void)
1388 RAMBlock *block;
1389 ram_addr_t last = 0;
1391 rcu_read_lock();
1392 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1393 last = MAX(last, block->offset + block->max_length);
1395 rcu_read_unlock();
1396 return last;
1399 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1401 int ret;
1403 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1404 if (!machine_dump_guest_core(current_machine)) {
1405 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1406 if (ret) {
1407 perror("qemu_madvise");
1408 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1409 "but dump_guest_core=off specified\n");
1414 /* Called within an RCU critical section, or while the ramlist lock
1415 * is held.
1417 static RAMBlock *find_ram_block(ram_addr_t addr)
1419 RAMBlock *block;
1421 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1422 if (block->offset == addr) {
1423 return block;
1427 return NULL;
1430 const char *qemu_ram_get_idstr(RAMBlock *rb)
1432 return rb->idstr;
1435 /* Called with iothread lock held. */
1436 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1438 RAMBlock *new_block, *block;
1440 rcu_read_lock();
1441 new_block = find_ram_block(addr);
1442 assert(new_block);
1443 assert(!new_block->idstr[0]);
1445 if (dev) {
1446 char *id = qdev_get_dev_path(dev);
1447 if (id) {
1448 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1449 g_free(id);
1452 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1454 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1455 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1456 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1457 new_block->idstr);
1458 abort();
1461 rcu_read_unlock();
1464 /* Called with iothread lock held. */
1465 void qemu_ram_unset_idstr(ram_addr_t addr)
1467 RAMBlock *block;
1469 /* FIXME: arch_init.c assumes that this is not called throughout
1470 * migration. Ignore the problem since hot-unplug during migration
1471 * does not work anyway.
1474 rcu_read_lock();
1475 block = find_ram_block(addr);
1476 if (block) {
1477 memset(block->idstr, 0, sizeof(block->idstr));
1479 rcu_read_unlock();
1482 static int memory_try_enable_merging(void *addr, size_t len)
1484 if (!machine_mem_merge(current_machine)) {
1485 /* disabled by the user */
1486 return 0;
1489 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1492 /* Only legal before guest might have detected the memory size: e.g. on
1493 * incoming migration, or right after reset.
1495 * As memory core doesn't know how is memory accessed, it is up to
1496 * resize callback to update device state and/or add assertions to detect
1497 * misuse, if necessary.
1499 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1501 RAMBlock *block = find_ram_block(base);
1503 assert(block);
1505 newsize = HOST_PAGE_ALIGN(newsize);
1507 if (block->used_length == newsize) {
1508 return 0;
1511 if (!(block->flags & RAM_RESIZEABLE)) {
1512 error_setg_errno(errp, EINVAL,
1513 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1514 " in != 0x" RAM_ADDR_FMT, block->idstr,
1515 newsize, block->used_length);
1516 return -EINVAL;
1519 if (block->max_length < newsize) {
1520 error_setg_errno(errp, EINVAL,
1521 "Length too large: %s: 0x" RAM_ADDR_FMT
1522 " > 0x" RAM_ADDR_FMT, block->idstr,
1523 newsize, block->max_length);
1524 return -EINVAL;
1527 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1528 block->used_length = newsize;
1529 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1530 DIRTY_CLIENTS_ALL);
1531 memory_region_set_size(block->mr, newsize);
1532 if (block->resized) {
1533 block->resized(block->idstr, newsize, block->host);
1535 return 0;
1538 /* Called with ram_list.mutex held */
1539 static void dirty_memory_extend(ram_addr_t old_ram_size,
1540 ram_addr_t new_ram_size)
1542 ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1543 DIRTY_MEMORY_BLOCK_SIZE);
1544 ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1545 DIRTY_MEMORY_BLOCK_SIZE);
1546 int i;
1548 /* Only need to extend if block count increased */
1549 if (new_num_blocks <= old_num_blocks) {
1550 return;
1553 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1554 DirtyMemoryBlocks *old_blocks;
1555 DirtyMemoryBlocks *new_blocks;
1556 int j;
1558 old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1559 new_blocks = g_malloc(sizeof(*new_blocks) +
1560 sizeof(new_blocks->blocks[0]) * new_num_blocks);
1562 if (old_num_blocks) {
1563 memcpy(new_blocks->blocks, old_blocks->blocks,
1564 old_num_blocks * sizeof(old_blocks->blocks[0]));
1567 for (j = old_num_blocks; j < new_num_blocks; j++) {
1568 new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1571 atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1573 if (old_blocks) {
1574 g_free_rcu(old_blocks, rcu);
1579 static void ram_block_add(RAMBlock *new_block, Error **errp)
1581 RAMBlock *block;
1582 RAMBlock *last_block = NULL;
1583 ram_addr_t old_ram_size, new_ram_size;
1584 Error *err = NULL;
1586 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1588 qemu_mutex_lock_ramlist();
1589 new_block->offset = find_ram_offset(new_block->max_length);
1591 if (!new_block->host) {
1592 if (xen_enabled()) {
1593 xen_ram_alloc(new_block->offset, new_block->max_length,
1594 new_block->mr, &err);
1595 if (err) {
1596 error_propagate(errp, err);
1597 qemu_mutex_unlock_ramlist();
1598 return;
1600 } else {
1601 new_block->host = phys_mem_alloc(new_block->max_length,
1602 &new_block->mr->align);
1603 if (!new_block->host) {
1604 error_setg_errno(errp, errno,
1605 "cannot set up guest memory '%s'",
1606 memory_region_name(new_block->mr));
1607 qemu_mutex_unlock_ramlist();
1608 return;
1610 memory_try_enable_merging(new_block->host, new_block->max_length);
1614 new_ram_size = MAX(old_ram_size,
1615 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1616 if (new_ram_size > old_ram_size) {
1617 migration_bitmap_extend(old_ram_size, new_ram_size);
1618 dirty_memory_extend(old_ram_size, new_ram_size);
1620 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1621 * QLIST (which has an RCU-friendly variant) does not have insertion at
1622 * tail, so save the last element in last_block.
1624 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1625 last_block = block;
1626 if (block->max_length < new_block->max_length) {
1627 break;
1630 if (block) {
1631 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1632 } else if (last_block) {
1633 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1634 } else { /* list is empty */
1635 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1637 ram_list.mru_block = NULL;
1639 /* Write list before version */
1640 smp_wmb();
1641 ram_list.version++;
1642 qemu_mutex_unlock_ramlist();
1644 cpu_physical_memory_set_dirty_range(new_block->offset,
1645 new_block->used_length,
1646 DIRTY_CLIENTS_ALL);
1648 if (new_block->host) {
1649 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1650 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1651 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1652 if (kvm_enabled()) {
1653 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1658 #ifdef __linux__
1659 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1660 bool share, const char *mem_path,
1661 Error **errp)
1663 RAMBlock *new_block;
1664 Error *local_err = NULL;
1666 if (xen_enabled()) {
1667 error_setg(errp, "-mem-path not supported with Xen");
1668 return NULL;
1671 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1673 * file_ram_alloc() needs to allocate just like
1674 * phys_mem_alloc, but we haven't bothered to provide
1675 * a hook there.
1677 error_setg(errp,
1678 "-mem-path not supported with this accelerator");
1679 return NULL;
1682 size = HOST_PAGE_ALIGN(size);
1683 new_block = g_malloc0(sizeof(*new_block));
1684 new_block->mr = mr;
1685 new_block->used_length = size;
1686 new_block->max_length = size;
1687 new_block->flags = share ? RAM_SHARED : 0;
1688 new_block->host = file_ram_alloc(new_block, size,
1689 mem_path, errp);
1690 if (!new_block->host) {
1691 g_free(new_block);
1692 return NULL;
1695 ram_block_add(new_block, &local_err);
1696 if (local_err) {
1697 g_free(new_block);
1698 error_propagate(errp, local_err);
1699 return NULL;
1701 return new_block;
1703 #endif
1705 static
1706 RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1707 void (*resized)(const char*,
1708 uint64_t length,
1709 void *host),
1710 void *host, bool resizeable,
1711 MemoryRegion *mr, Error **errp)
1713 RAMBlock *new_block;
1714 Error *local_err = NULL;
1716 size = HOST_PAGE_ALIGN(size);
1717 max_size = HOST_PAGE_ALIGN(max_size);
1718 new_block = g_malloc0(sizeof(*new_block));
1719 new_block->mr = mr;
1720 new_block->resized = resized;
1721 new_block->used_length = size;
1722 new_block->max_length = max_size;
1723 assert(max_size >= size);
1724 new_block->fd = -1;
1725 new_block->host = host;
1726 if (host) {
1727 new_block->flags |= RAM_PREALLOC;
1729 if (resizeable) {
1730 new_block->flags |= RAM_RESIZEABLE;
1732 ram_block_add(new_block, &local_err);
1733 if (local_err) {
1734 g_free(new_block);
1735 error_propagate(errp, local_err);
1736 return NULL;
1738 return new_block;
1741 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1742 MemoryRegion *mr, Error **errp)
1744 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1747 RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1749 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1752 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1753 void (*resized)(const char*,
1754 uint64_t length,
1755 void *host),
1756 MemoryRegion *mr, Error **errp)
1758 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1761 static void reclaim_ramblock(RAMBlock *block)
1763 if (block->flags & RAM_PREALLOC) {
1765 } else if (xen_enabled()) {
1766 xen_invalidate_map_cache_entry(block->host);
1767 #ifndef _WIN32
1768 } else if (block->fd >= 0) {
1769 qemu_ram_munmap(block->host, block->max_length);
1770 close(block->fd);
1771 #endif
1772 } else {
1773 qemu_anon_ram_free(block->host, block->max_length);
1775 g_free(block);
1778 void qemu_ram_free(RAMBlock *block)
1780 if (!block) {
1781 return;
1784 qemu_mutex_lock_ramlist();
1785 QLIST_REMOVE_RCU(block, next);
1786 ram_list.mru_block = NULL;
1787 /* Write list before version */
1788 smp_wmb();
1789 ram_list.version++;
1790 call_rcu(block, reclaim_ramblock, rcu);
1791 qemu_mutex_unlock_ramlist();
1794 #ifndef _WIN32
1795 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1797 RAMBlock *block;
1798 ram_addr_t offset;
1799 int flags;
1800 void *area, *vaddr;
1802 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1803 offset = addr - block->offset;
1804 if (offset < block->max_length) {
1805 vaddr = ramblock_ptr(block, offset);
1806 if (block->flags & RAM_PREALLOC) {
1808 } else if (xen_enabled()) {
1809 abort();
1810 } else {
1811 flags = MAP_FIXED;
1812 if (block->fd >= 0) {
1813 flags |= (block->flags & RAM_SHARED ?
1814 MAP_SHARED : MAP_PRIVATE);
1815 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1816 flags, block->fd, offset);
1817 } else {
1819 * Remap needs to match alloc. Accelerators that
1820 * set phys_mem_alloc never remap. If they did,
1821 * we'd need a remap hook here.
1823 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1825 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1826 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1827 flags, -1, 0);
1829 if (area != vaddr) {
1830 fprintf(stderr, "Could not remap addr: "
1831 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1832 length, addr);
1833 exit(1);
1835 memory_try_enable_merging(vaddr, length);
1836 qemu_ram_setup_dump(vaddr, length);
1841 #endif /* !_WIN32 */
1843 int qemu_get_ram_fd(ram_addr_t addr)
1845 RAMBlock *block;
1846 int fd;
1848 rcu_read_lock();
1849 block = qemu_get_ram_block(addr);
1850 fd = block->fd;
1851 rcu_read_unlock();
1852 return fd;
1855 void qemu_set_ram_fd(ram_addr_t addr, int fd)
1857 RAMBlock *block;
1859 rcu_read_lock();
1860 block = qemu_get_ram_block(addr);
1861 block->fd = fd;
1862 rcu_read_unlock();
1865 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1867 RAMBlock *block;
1868 void *ptr;
1870 rcu_read_lock();
1871 block = qemu_get_ram_block(addr);
1872 ptr = ramblock_ptr(block, 0);
1873 rcu_read_unlock();
1874 return ptr;
1877 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1878 * This should not be used for general purpose DMA. Use address_space_map
1879 * or address_space_rw instead. For local memory (e.g. video ram) that the
1880 * device owns, use memory_region_get_ram_ptr.
1882 * Called within RCU critical section.
1884 void *qemu_get_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1886 RAMBlock *block = ram_block;
1888 if (block == NULL) {
1889 block = qemu_get_ram_block(addr);
1892 if (xen_enabled() && block->host == NULL) {
1893 /* We need to check if the requested address is in the RAM
1894 * because we don't want to map the entire memory in QEMU.
1895 * In that case just map until the end of the page.
1897 if (block->offset == 0) {
1898 return xen_map_cache(addr, 0, 0);
1901 block->host = xen_map_cache(block->offset, block->max_length, 1);
1903 return ramblock_ptr(block, addr - block->offset);
1906 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1907 * but takes a size argument.
1909 * Called within RCU critical section.
1911 static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1912 hwaddr *size)
1914 RAMBlock *block = ram_block;
1915 ram_addr_t offset_inside_block;
1916 if (*size == 0) {
1917 return NULL;
1920 if (block == NULL) {
1921 block = qemu_get_ram_block(addr);
1923 offset_inside_block = addr - block->offset;
1924 *size = MIN(*size, block->max_length - offset_inside_block);
1926 if (xen_enabled() && block->host == NULL) {
1927 /* We need to check if the requested address is in the RAM
1928 * because we don't want to map the entire memory in QEMU.
1929 * In that case just map the requested area.
1931 if (block->offset == 0) {
1932 return xen_map_cache(addr, *size, 1);
1935 block->host = xen_map_cache(block->offset, block->max_length, 1);
1938 return ramblock_ptr(block, offset_inside_block);
1942 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1943 * in that RAMBlock.
1945 * ptr: Host pointer to look up
1946 * round_offset: If true round the result offset down to a page boundary
1947 * *ram_addr: set to result ram_addr
1948 * *offset: set to result offset within the RAMBlock
1950 * Returns: RAMBlock (or NULL if not found)
1952 * By the time this function returns, the returned pointer is not protected
1953 * by RCU anymore. If the caller is not within an RCU critical section and
1954 * does not hold the iothread lock, it must have other means of protecting the
1955 * pointer, such as a reference to the region that includes the incoming
1956 * ram_addr_t.
1958 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1959 ram_addr_t *ram_addr,
1960 ram_addr_t *offset)
1962 RAMBlock *block;
1963 uint8_t *host = ptr;
1965 if (xen_enabled()) {
1966 rcu_read_lock();
1967 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1968 block = qemu_get_ram_block(*ram_addr);
1969 if (block) {
1970 *offset = (host - block->host);
1972 rcu_read_unlock();
1973 return block;
1976 rcu_read_lock();
1977 block = atomic_rcu_read(&ram_list.mru_block);
1978 if (block && block->host && host - block->host < block->max_length) {
1979 goto found;
1982 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1983 /* This case append when the block is not mapped. */
1984 if (block->host == NULL) {
1985 continue;
1987 if (host - block->host < block->max_length) {
1988 goto found;
1992 rcu_read_unlock();
1993 return NULL;
1995 found:
1996 *offset = (host - block->host);
1997 if (round_offset) {
1998 *offset &= TARGET_PAGE_MASK;
2000 *ram_addr = block->offset + *offset;
2001 rcu_read_unlock();
2002 return block;
2006 * Finds the named RAMBlock
2008 * name: The name of RAMBlock to find
2010 * Returns: RAMBlock (or NULL if not found)
2012 RAMBlock *qemu_ram_block_by_name(const char *name)
2014 RAMBlock *block;
2016 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2017 if (!strcmp(name, block->idstr)) {
2018 return block;
2022 return NULL;
2025 /* Some of the softmmu routines need to translate from a host pointer
2026 (typically a TLB entry) back to a ram offset. */
2027 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2029 RAMBlock *block;
2030 ram_addr_t offset; /* Not used */
2032 block = qemu_ram_block_from_host(ptr, false, ram_addr, &offset);
2034 if (!block) {
2035 return NULL;
2038 return block->mr;
2041 /* Called within RCU critical section. */
2042 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2043 uint64_t val, unsigned size)
2045 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
2046 tb_invalidate_phys_page_fast(ram_addr, size);
2048 switch (size) {
2049 case 1:
2050 stb_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2051 break;
2052 case 2:
2053 stw_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2054 break;
2055 case 4:
2056 stl_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2057 break;
2058 default:
2059 abort();
2061 /* Set both VGA and migration bits for simplicity and to remove
2062 * the notdirty callback faster.
2064 cpu_physical_memory_set_dirty_range(ram_addr, size,
2065 DIRTY_CLIENTS_NOCODE);
2066 /* we remove the notdirty callback only if the code has been
2067 flushed */
2068 if (!cpu_physical_memory_is_clean(ram_addr)) {
2069 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2073 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2074 unsigned size, bool is_write)
2076 return is_write;
2079 static const MemoryRegionOps notdirty_mem_ops = {
2080 .write = notdirty_mem_write,
2081 .valid.accepts = notdirty_mem_accepts,
2082 .endianness = DEVICE_NATIVE_ENDIAN,
2085 /* Generate a debug exception if a watchpoint has been hit. */
2086 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2088 CPUState *cpu = current_cpu;
2089 CPUClass *cc = CPU_GET_CLASS(cpu);
2090 CPUArchState *env = cpu->env_ptr;
2091 target_ulong pc, cs_base;
2092 target_ulong vaddr;
2093 CPUWatchpoint *wp;
2094 uint32_t cpu_flags;
2096 if (cpu->watchpoint_hit) {
2097 /* We re-entered the check after replacing the TB. Now raise
2098 * the debug interrupt so that is will trigger after the
2099 * current instruction. */
2100 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2101 return;
2103 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2104 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2105 if (cpu_watchpoint_address_matches(wp, vaddr, len)
2106 && (wp->flags & flags)) {
2107 if (flags == BP_MEM_READ) {
2108 wp->flags |= BP_WATCHPOINT_HIT_READ;
2109 } else {
2110 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2112 wp->hitaddr = vaddr;
2113 wp->hitattrs = attrs;
2114 if (!cpu->watchpoint_hit) {
2115 if (wp->flags & BP_CPU &&
2116 !cc->debug_check_watchpoint(cpu, wp)) {
2117 wp->flags &= ~BP_WATCHPOINT_HIT;
2118 continue;
2120 cpu->watchpoint_hit = wp;
2121 tb_check_watchpoint(cpu);
2122 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2123 cpu->exception_index = EXCP_DEBUG;
2124 cpu_loop_exit(cpu);
2125 } else {
2126 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2127 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2128 cpu_resume_from_signal(cpu, NULL);
2131 } else {
2132 wp->flags &= ~BP_WATCHPOINT_HIT;
2137 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2138 so these check for a hit then pass through to the normal out-of-line
2139 phys routines. */
2140 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2141 unsigned size, MemTxAttrs attrs)
2143 MemTxResult res;
2144 uint64_t data;
2145 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2146 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2148 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2149 switch (size) {
2150 case 1:
2151 data = address_space_ldub(as, addr, attrs, &res);
2152 break;
2153 case 2:
2154 data = address_space_lduw(as, addr, attrs, &res);
2155 break;
2156 case 4:
2157 data = address_space_ldl(as, addr, attrs, &res);
2158 break;
2159 default: abort();
2161 *pdata = data;
2162 return res;
2165 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2166 uint64_t val, unsigned size,
2167 MemTxAttrs attrs)
2169 MemTxResult res;
2170 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2171 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2173 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2174 switch (size) {
2175 case 1:
2176 address_space_stb(as, addr, val, attrs, &res);
2177 break;
2178 case 2:
2179 address_space_stw(as, addr, val, attrs, &res);
2180 break;
2181 case 4:
2182 address_space_stl(as, addr, val, attrs, &res);
2183 break;
2184 default: abort();
2186 return res;
2189 static const MemoryRegionOps watch_mem_ops = {
2190 .read_with_attrs = watch_mem_read,
2191 .write_with_attrs = watch_mem_write,
2192 .endianness = DEVICE_NATIVE_ENDIAN,
2195 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2196 unsigned len, MemTxAttrs attrs)
2198 subpage_t *subpage = opaque;
2199 uint8_t buf[8];
2200 MemTxResult res;
2202 #if defined(DEBUG_SUBPAGE)
2203 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2204 subpage, len, addr);
2205 #endif
2206 res = address_space_read(subpage->as, addr + subpage->base,
2207 attrs, buf, len);
2208 if (res) {
2209 return res;
2211 switch (len) {
2212 case 1:
2213 *data = ldub_p(buf);
2214 return MEMTX_OK;
2215 case 2:
2216 *data = lduw_p(buf);
2217 return MEMTX_OK;
2218 case 4:
2219 *data = ldl_p(buf);
2220 return MEMTX_OK;
2221 case 8:
2222 *data = ldq_p(buf);
2223 return MEMTX_OK;
2224 default:
2225 abort();
2229 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2230 uint64_t value, unsigned len, MemTxAttrs attrs)
2232 subpage_t *subpage = opaque;
2233 uint8_t buf[8];
2235 #if defined(DEBUG_SUBPAGE)
2236 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2237 " value %"PRIx64"\n",
2238 __func__, subpage, len, addr, value);
2239 #endif
2240 switch (len) {
2241 case 1:
2242 stb_p(buf, value);
2243 break;
2244 case 2:
2245 stw_p(buf, value);
2246 break;
2247 case 4:
2248 stl_p(buf, value);
2249 break;
2250 case 8:
2251 stq_p(buf, value);
2252 break;
2253 default:
2254 abort();
2256 return address_space_write(subpage->as, addr + subpage->base,
2257 attrs, buf, len);
2260 static bool subpage_accepts(void *opaque, hwaddr addr,
2261 unsigned len, bool is_write)
2263 subpage_t *subpage = opaque;
2264 #if defined(DEBUG_SUBPAGE)
2265 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2266 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2267 #endif
2269 return address_space_access_valid(subpage->as, addr + subpage->base,
2270 len, is_write);
2273 static const MemoryRegionOps subpage_ops = {
2274 .read_with_attrs = subpage_read,
2275 .write_with_attrs = subpage_write,
2276 .impl.min_access_size = 1,
2277 .impl.max_access_size = 8,
2278 .valid.min_access_size = 1,
2279 .valid.max_access_size = 8,
2280 .valid.accepts = subpage_accepts,
2281 .endianness = DEVICE_NATIVE_ENDIAN,
2284 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2285 uint16_t section)
2287 int idx, eidx;
2289 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2290 return -1;
2291 idx = SUBPAGE_IDX(start);
2292 eidx = SUBPAGE_IDX(end);
2293 #if defined(DEBUG_SUBPAGE)
2294 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2295 __func__, mmio, start, end, idx, eidx, section);
2296 #endif
2297 for (; idx <= eidx; idx++) {
2298 mmio->sub_section[idx] = section;
2301 return 0;
2304 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2306 subpage_t *mmio;
2308 mmio = g_malloc0(sizeof(subpage_t));
2310 mmio->as = as;
2311 mmio->base = base;
2312 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2313 NULL, TARGET_PAGE_SIZE);
2314 mmio->iomem.subpage = true;
2315 #if defined(DEBUG_SUBPAGE)
2316 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2317 mmio, base, TARGET_PAGE_SIZE);
2318 #endif
2319 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2321 return mmio;
2324 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2325 MemoryRegion *mr)
2327 assert(as);
2328 MemoryRegionSection section = {
2329 .address_space = as,
2330 .mr = mr,
2331 .offset_within_address_space = 0,
2332 .offset_within_region = 0,
2333 .size = int128_2_64(),
2336 return phys_section_add(map, &section);
2339 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2341 int asidx = cpu_asidx_from_attrs(cpu, attrs);
2342 CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2343 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2344 MemoryRegionSection *sections = d->map.sections;
2346 return sections[index & ~TARGET_PAGE_MASK].mr;
2349 static void io_mem_init(void)
2351 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2352 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2353 NULL, UINT64_MAX);
2354 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2355 NULL, UINT64_MAX);
2356 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2357 NULL, UINT64_MAX);
2360 static void mem_begin(MemoryListener *listener)
2362 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2363 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2364 uint16_t n;
2366 n = dummy_section(&d->map, as, &io_mem_unassigned);
2367 assert(n == PHYS_SECTION_UNASSIGNED);
2368 n = dummy_section(&d->map, as, &io_mem_notdirty);
2369 assert(n == PHYS_SECTION_NOTDIRTY);
2370 n = dummy_section(&d->map, as, &io_mem_rom);
2371 assert(n == PHYS_SECTION_ROM);
2372 n = dummy_section(&d->map, as, &io_mem_watch);
2373 assert(n == PHYS_SECTION_WATCH);
2375 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2376 d->as = as;
2377 as->next_dispatch = d;
2380 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2382 phys_sections_free(&d->map);
2383 g_free(d);
2386 static void mem_commit(MemoryListener *listener)
2388 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2389 AddressSpaceDispatch *cur = as->dispatch;
2390 AddressSpaceDispatch *next = as->next_dispatch;
2392 phys_page_compact_all(next, next->map.nodes_nb);
2394 atomic_rcu_set(&as->dispatch, next);
2395 if (cur) {
2396 call_rcu(cur, address_space_dispatch_free, rcu);
2400 static void tcg_commit(MemoryListener *listener)
2402 CPUAddressSpace *cpuas;
2403 AddressSpaceDispatch *d;
2405 /* since each CPU stores ram addresses in its TLB cache, we must
2406 reset the modified entries */
2407 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2408 cpu_reloading_memory_map();
2409 /* The CPU and TLB are protected by the iothread lock.
2410 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2411 * may have split the RCU critical section.
2413 d = atomic_rcu_read(&cpuas->as->dispatch);
2414 cpuas->memory_dispatch = d;
2415 tlb_flush(cpuas->cpu, 1);
2418 void address_space_init_dispatch(AddressSpace *as)
2420 as->dispatch = NULL;
2421 as->dispatch_listener = (MemoryListener) {
2422 .begin = mem_begin,
2423 .commit = mem_commit,
2424 .region_add = mem_add,
2425 .region_nop = mem_add,
2426 .priority = 0,
2428 memory_listener_register(&as->dispatch_listener, as);
2431 void address_space_unregister(AddressSpace *as)
2433 memory_listener_unregister(&as->dispatch_listener);
2436 void address_space_destroy_dispatch(AddressSpace *as)
2438 AddressSpaceDispatch *d = as->dispatch;
2440 atomic_rcu_set(&as->dispatch, NULL);
2441 if (d) {
2442 call_rcu(d, address_space_dispatch_free, rcu);
2446 static void memory_map_init(void)
2448 system_memory = g_malloc(sizeof(*system_memory));
2450 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2451 address_space_init(&address_space_memory, system_memory, "memory");
2453 system_io = g_malloc(sizeof(*system_io));
2454 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2455 65536);
2456 address_space_init(&address_space_io, system_io, "I/O");
2459 MemoryRegion *get_system_memory(void)
2461 return system_memory;
2464 MemoryRegion *get_system_io(void)
2466 return system_io;
2469 #endif /* !defined(CONFIG_USER_ONLY) */
2471 /* physical memory access (slow version, mainly for debug) */
2472 #if defined(CONFIG_USER_ONLY)
2473 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2474 uint8_t *buf, int len, int is_write)
2476 int l, flags;
2477 target_ulong page;
2478 void * p;
2480 while (len > 0) {
2481 page = addr & TARGET_PAGE_MASK;
2482 l = (page + TARGET_PAGE_SIZE) - addr;
2483 if (l > len)
2484 l = len;
2485 flags = page_get_flags(page);
2486 if (!(flags & PAGE_VALID))
2487 return -1;
2488 if (is_write) {
2489 if (!(flags & PAGE_WRITE))
2490 return -1;
2491 /* XXX: this code should not depend on lock_user */
2492 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2493 return -1;
2494 memcpy(p, buf, l);
2495 unlock_user(p, addr, l);
2496 } else {
2497 if (!(flags & PAGE_READ))
2498 return -1;
2499 /* XXX: this code should not depend on lock_user */
2500 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2501 return -1;
2502 memcpy(buf, p, l);
2503 unlock_user(p, addr, 0);
2505 len -= l;
2506 buf += l;
2507 addr += l;
2509 return 0;
2512 #else
2514 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2515 hwaddr length)
2517 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2518 /* No early return if dirty_log_mask is or becomes 0, because
2519 * cpu_physical_memory_set_dirty_range will still call
2520 * xen_modified_memory.
2522 if (dirty_log_mask) {
2523 dirty_log_mask =
2524 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2526 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2527 tb_invalidate_phys_range(addr, addr + length);
2528 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2530 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2533 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2535 unsigned access_size_max = mr->ops->valid.max_access_size;
2537 /* Regions are assumed to support 1-4 byte accesses unless
2538 otherwise specified. */
2539 if (access_size_max == 0) {
2540 access_size_max = 4;
2543 /* Bound the maximum access by the alignment of the address. */
2544 if (!mr->ops->impl.unaligned) {
2545 unsigned align_size_max = addr & -addr;
2546 if (align_size_max != 0 && align_size_max < access_size_max) {
2547 access_size_max = align_size_max;
2551 /* Don't attempt accesses larger than the maximum. */
2552 if (l > access_size_max) {
2553 l = access_size_max;
2555 l = pow2floor(l);
2557 return l;
2560 static bool prepare_mmio_access(MemoryRegion *mr)
2562 bool unlocked = !qemu_mutex_iothread_locked();
2563 bool release_lock = false;
2565 if (unlocked && mr->global_locking) {
2566 qemu_mutex_lock_iothread();
2567 unlocked = false;
2568 release_lock = true;
2570 if (mr->flush_coalesced_mmio) {
2571 if (unlocked) {
2572 qemu_mutex_lock_iothread();
2574 qemu_flush_coalesced_mmio_buffer();
2575 if (unlocked) {
2576 qemu_mutex_unlock_iothread();
2580 return release_lock;
2583 /* Called within RCU critical section. */
2584 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2585 MemTxAttrs attrs,
2586 const uint8_t *buf,
2587 int len, hwaddr addr1,
2588 hwaddr l, MemoryRegion *mr)
2590 uint8_t *ptr;
2591 uint64_t val;
2592 MemTxResult result = MEMTX_OK;
2593 bool release_lock = false;
2595 for (;;) {
2596 if (!memory_access_is_direct(mr, true)) {
2597 release_lock |= prepare_mmio_access(mr);
2598 l = memory_access_size(mr, l, addr1);
2599 /* XXX: could force current_cpu to NULL to avoid
2600 potential bugs */
2601 switch (l) {
2602 case 8:
2603 /* 64 bit write access */
2604 val = ldq_p(buf);
2605 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2606 attrs);
2607 break;
2608 case 4:
2609 /* 32 bit write access */
2610 val = ldl_p(buf);
2611 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2612 attrs);
2613 break;
2614 case 2:
2615 /* 16 bit write access */
2616 val = lduw_p(buf);
2617 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2618 attrs);
2619 break;
2620 case 1:
2621 /* 8 bit write access */
2622 val = ldub_p(buf);
2623 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2624 attrs);
2625 break;
2626 default:
2627 abort();
2629 } else {
2630 addr1 += memory_region_get_ram_addr(mr);
2631 /* RAM case */
2632 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
2633 memcpy(ptr, buf, l);
2634 invalidate_and_set_dirty(mr, addr1, l);
2637 if (release_lock) {
2638 qemu_mutex_unlock_iothread();
2639 release_lock = false;
2642 len -= l;
2643 buf += l;
2644 addr += l;
2646 if (!len) {
2647 break;
2650 l = len;
2651 mr = address_space_translate(as, addr, &addr1, &l, true);
2654 return result;
2657 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2658 const uint8_t *buf, int len)
2660 hwaddr l;
2661 hwaddr addr1;
2662 MemoryRegion *mr;
2663 MemTxResult result = MEMTX_OK;
2665 if (len > 0) {
2666 rcu_read_lock();
2667 l = len;
2668 mr = address_space_translate(as, addr, &addr1, &l, true);
2669 result = address_space_write_continue(as, addr, attrs, buf, len,
2670 addr1, l, mr);
2671 rcu_read_unlock();
2674 return result;
2677 /* Called within RCU critical section. */
2678 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2679 MemTxAttrs attrs, uint8_t *buf,
2680 int len, hwaddr addr1, hwaddr l,
2681 MemoryRegion *mr)
2683 uint8_t *ptr;
2684 uint64_t val;
2685 MemTxResult result = MEMTX_OK;
2686 bool release_lock = false;
2688 for (;;) {
2689 if (!memory_access_is_direct(mr, false)) {
2690 /* I/O case */
2691 release_lock |= prepare_mmio_access(mr);
2692 l = memory_access_size(mr, l, addr1);
2693 switch (l) {
2694 case 8:
2695 /* 64 bit read access */
2696 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2697 attrs);
2698 stq_p(buf, val);
2699 break;
2700 case 4:
2701 /* 32 bit read access */
2702 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2703 attrs);
2704 stl_p(buf, val);
2705 break;
2706 case 2:
2707 /* 16 bit read access */
2708 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2709 attrs);
2710 stw_p(buf, val);
2711 break;
2712 case 1:
2713 /* 8 bit read access */
2714 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2715 attrs);
2716 stb_p(buf, val);
2717 break;
2718 default:
2719 abort();
2721 } else {
2722 /* RAM case */
2723 ptr = qemu_get_ram_ptr(mr->ram_block,
2724 memory_region_get_ram_addr(mr) + addr1);
2725 memcpy(buf, ptr, l);
2728 if (release_lock) {
2729 qemu_mutex_unlock_iothread();
2730 release_lock = false;
2733 len -= l;
2734 buf += l;
2735 addr += l;
2737 if (!len) {
2738 break;
2741 l = len;
2742 mr = address_space_translate(as, addr, &addr1, &l, false);
2745 return result;
2748 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2749 MemTxAttrs attrs, uint8_t *buf, int len)
2751 hwaddr l;
2752 hwaddr addr1;
2753 MemoryRegion *mr;
2754 MemTxResult result = MEMTX_OK;
2756 if (len > 0) {
2757 rcu_read_lock();
2758 l = len;
2759 mr = address_space_translate(as, addr, &addr1, &l, false);
2760 result = address_space_read_continue(as, addr, attrs, buf, len,
2761 addr1, l, mr);
2762 rcu_read_unlock();
2765 return result;
2768 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2769 uint8_t *buf, int len, bool is_write)
2771 if (is_write) {
2772 return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2773 } else {
2774 return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2778 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2779 int len, int is_write)
2781 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2782 buf, len, is_write);
2785 enum write_rom_type {
2786 WRITE_DATA,
2787 FLUSH_CACHE,
2790 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2791 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2793 hwaddr l;
2794 uint8_t *ptr;
2795 hwaddr addr1;
2796 MemoryRegion *mr;
2798 rcu_read_lock();
2799 while (len > 0) {
2800 l = len;
2801 mr = address_space_translate(as, addr, &addr1, &l, true);
2803 if (!(memory_region_is_ram(mr) ||
2804 memory_region_is_romd(mr))) {
2805 l = memory_access_size(mr, l, addr1);
2806 } else {
2807 addr1 += memory_region_get_ram_addr(mr);
2808 /* ROM/RAM case */
2809 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
2810 switch (type) {
2811 case WRITE_DATA:
2812 memcpy(ptr, buf, l);
2813 invalidate_and_set_dirty(mr, addr1, l);
2814 break;
2815 case FLUSH_CACHE:
2816 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2817 break;
2820 len -= l;
2821 buf += l;
2822 addr += l;
2824 rcu_read_unlock();
2827 /* used for ROM loading : can write in RAM and ROM */
2828 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2829 const uint8_t *buf, int len)
2831 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2834 void cpu_flush_icache_range(hwaddr start, int len)
2837 * This function should do the same thing as an icache flush that was
2838 * triggered from within the guest. For TCG we are always cache coherent,
2839 * so there is no need to flush anything. For KVM / Xen we need to flush
2840 * the host's instruction cache at least.
2842 if (tcg_enabled()) {
2843 return;
2846 cpu_physical_memory_write_rom_internal(&address_space_memory,
2847 start, NULL, len, FLUSH_CACHE);
2850 typedef struct {
2851 MemoryRegion *mr;
2852 void *buffer;
2853 hwaddr addr;
2854 hwaddr len;
2855 bool in_use;
2856 } BounceBuffer;
2858 static BounceBuffer bounce;
2860 typedef struct MapClient {
2861 QEMUBH *bh;
2862 QLIST_ENTRY(MapClient) link;
2863 } MapClient;
2865 QemuMutex map_client_list_lock;
2866 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2867 = QLIST_HEAD_INITIALIZER(map_client_list);
2869 static void cpu_unregister_map_client_do(MapClient *client)
2871 QLIST_REMOVE(client, link);
2872 g_free(client);
2875 static void cpu_notify_map_clients_locked(void)
2877 MapClient *client;
2879 while (!QLIST_EMPTY(&map_client_list)) {
2880 client = QLIST_FIRST(&map_client_list);
2881 qemu_bh_schedule(client->bh);
2882 cpu_unregister_map_client_do(client);
2886 void cpu_register_map_client(QEMUBH *bh)
2888 MapClient *client = g_malloc(sizeof(*client));
2890 qemu_mutex_lock(&map_client_list_lock);
2891 client->bh = bh;
2892 QLIST_INSERT_HEAD(&map_client_list, client, link);
2893 if (!atomic_read(&bounce.in_use)) {
2894 cpu_notify_map_clients_locked();
2896 qemu_mutex_unlock(&map_client_list_lock);
2899 void cpu_exec_init_all(void)
2901 qemu_mutex_init(&ram_list.mutex);
2902 io_mem_init();
2903 memory_map_init();
2904 qemu_mutex_init(&map_client_list_lock);
2907 void cpu_unregister_map_client(QEMUBH *bh)
2909 MapClient *client;
2911 qemu_mutex_lock(&map_client_list_lock);
2912 QLIST_FOREACH(client, &map_client_list, link) {
2913 if (client->bh == bh) {
2914 cpu_unregister_map_client_do(client);
2915 break;
2918 qemu_mutex_unlock(&map_client_list_lock);
2921 static void cpu_notify_map_clients(void)
2923 qemu_mutex_lock(&map_client_list_lock);
2924 cpu_notify_map_clients_locked();
2925 qemu_mutex_unlock(&map_client_list_lock);
2928 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2930 MemoryRegion *mr;
2931 hwaddr l, xlat;
2933 rcu_read_lock();
2934 while (len > 0) {
2935 l = len;
2936 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2937 if (!memory_access_is_direct(mr, is_write)) {
2938 l = memory_access_size(mr, l, addr);
2939 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2940 return false;
2944 len -= l;
2945 addr += l;
2947 rcu_read_unlock();
2948 return true;
2951 /* Map a physical memory region into a host virtual address.
2952 * May map a subset of the requested range, given by and returned in *plen.
2953 * May return NULL if resources needed to perform the mapping are exhausted.
2954 * Use only for reads OR writes - not for read-modify-write operations.
2955 * Use cpu_register_map_client() to know when retrying the map operation is
2956 * likely to succeed.
2958 void *address_space_map(AddressSpace *as,
2959 hwaddr addr,
2960 hwaddr *plen,
2961 bool is_write)
2963 hwaddr len = *plen;
2964 hwaddr done = 0;
2965 hwaddr l, xlat, base;
2966 MemoryRegion *mr, *this_mr;
2967 ram_addr_t raddr;
2968 void *ptr;
2970 if (len == 0) {
2971 return NULL;
2974 l = len;
2975 rcu_read_lock();
2976 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2978 if (!memory_access_is_direct(mr, is_write)) {
2979 if (atomic_xchg(&bounce.in_use, true)) {
2980 rcu_read_unlock();
2981 return NULL;
2983 /* Avoid unbounded allocations */
2984 l = MIN(l, TARGET_PAGE_SIZE);
2985 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2986 bounce.addr = addr;
2987 bounce.len = l;
2989 memory_region_ref(mr);
2990 bounce.mr = mr;
2991 if (!is_write) {
2992 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2993 bounce.buffer, l);
2996 rcu_read_unlock();
2997 *plen = l;
2998 return bounce.buffer;
3001 base = xlat;
3002 raddr = memory_region_get_ram_addr(mr);
3004 for (;;) {
3005 len -= l;
3006 addr += l;
3007 done += l;
3008 if (len == 0) {
3009 break;
3012 l = len;
3013 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
3014 if (this_mr != mr || xlat != base + done) {
3015 break;
3019 memory_region_ref(mr);
3020 *plen = done;
3021 ptr = qemu_ram_ptr_length(mr->ram_block, raddr + base, plen);
3022 rcu_read_unlock();
3024 return ptr;
3027 /* Unmaps a memory region previously mapped by address_space_map().
3028 * Will also mark the memory as dirty if is_write == 1. access_len gives
3029 * the amount of memory that was actually read or written by the caller.
3031 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3032 int is_write, hwaddr access_len)
3034 if (buffer != bounce.buffer) {
3035 MemoryRegion *mr;
3036 ram_addr_t addr1;
3038 mr = qemu_ram_addr_from_host(buffer, &addr1);
3039 assert(mr != NULL);
3040 if (is_write) {
3041 invalidate_and_set_dirty(mr, addr1, access_len);
3043 if (xen_enabled()) {
3044 xen_invalidate_map_cache_entry(buffer);
3046 memory_region_unref(mr);
3047 return;
3049 if (is_write) {
3050 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
3051 bounce.buffer, access_len);
3053 qemu_vfree(bounce.buffer);
3054 bounce.buffer = NULL;
3055 memory_region_unref(bounce.mr);
3056 atomic_mb_set(&bounce.in_use, false);
3057 cpu_notify_map_clients();
3060 void *cpu_physical_memory_map(hwaddr addr,
3061 hwaddr *plen,
3062 int is_write)
3064 return address_space_map(&address_space_memory, addr, plen, is_write);
3067 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3068 int is_write, hwaddr access_len)
3070 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3073 /* warning: addr must be aligned */
3074 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3075 MemTxAttrs attrs,
3076 MemTxResult *result,
3077 enum device_endian endian)
3079 uint8_t *ptr;
3080 uint64_t val;
3081 MemoryRegion *mr;
3082 hwaddr l = 4;
3083 hwaddr addr1;
3084 MemTxResult r;
3085 bool release_lock = false;
3087 rcu_read_lock();
3088 mr = address_space_translate(as, addr, &addr1, &l, false);
3089 if (l < 4 || !memory_access_is_direct(mr, false)) {
3090 release_lock |= prepare_mmio_access(mr);
3092 /* I/O case */
3093 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3094 #if defined(TARGET_WORDS_BIGENDIAN)
3095 if (endian == DEVICE_LITTLE_ENDIAN) {
3096 val = bswap32(val);
3098 #else
3099 if (endian == DEVICE_BIG_ENDIAN) {
3100 val = bswap32(val);
3102 #endif
3103 } else {
3104 /* RAM case */
3105 ptr = qemu_get_ram_ptr(mr->ram_block,
3106 (memory_region_get_ram_addr(mr)
3107 & TARGET_PAGE_MASK)
3108 + addr1);
3109 switch (endian) {
3110 case DEVICE_LITTLE_ENDIAN:
3111 val = ldl_le_p(ptr);
3112 break;
3113 case DEVICE_BIG_ENDIAN:
3114 val = ldl_be_p(ptr);
3115 break;
3116 default:
3117 val = ldl_p(ptr);
3118 break;
3120 r = MEMTX_OK;
3122 if (result) {
3123 *result = r;
3125 if (release_lock) {
3126 qemu_mutex_unlock_iothread();
3128 rcu_read_unlock();
3129 return val;
3132 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3133 MemTxAttrs attrs, MemTxResult *result)
3135 return address_space_ldl_internal(as, addr, attrs, result,
3136 DEVICE_NATIVE_ENDIAN);
3139 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3140 MemTxAttrs attrs, MemTxResult *result)
3142 return address_space_ldl_internal(as, addr, attrs, result,
3143 DEVICE_LITTLE_ENDIAN);
3146 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3147 MemTxAttrs attrs, MemTxResult *result)
3149 return address_space_ldl_internal(as, addr, attrs, result,
3150 DEVICE_BIG_ENDIAN);
3153 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3155 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3158 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3160 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3163 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3165 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3168 /* warning: addr must be aligned */
3169 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3170 MemTxAttrs attrs,
3171 MemTxResult *result,
3172 enum device_endian endian)
3174 uint8_t *ptr;
3175 uint64_t val;
3176 MemoryRegion *mr;
3177 hwaddr l = 8;
3178 hwaddr addr1;
3179 MemTxResult r;
3180 bool release_lock = false;
3182 rcu_read_lock();
3183 mr = address_space_translate(as, addr, &addr1, &l,
3184 false);
3185 if (l < 8 || !memory_access_is_direct(mr, false)) {
3186 release_lock |= prepare_mmio_access(mr);
3188 /* I/O case */
3189 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3190 #if defined(TARGET_WORDS_BIGENDIAN)
3191 if (endian == DEVICE_LITTLE_ENDIAN) {
3192 val = bswap64(val);
3194 #else
3195 if (endian == DEVICE_BIG_ENDIAN) {
3196 val = bswap64(val);
3198 #endif
3199 } else {
3200 /* RAM case */
3201 ptr = qemu_get_ram_ptr(mr->ram_block,
3202 (memory_region_get_ram_addr(mr)
3203 & TARGET_PAGE_MASK)
3204 + addr1);
3205 switch (endian) {
3206 case DEVICE_LITTLE_ENDIAN:
3207 val = ldq_le_p(ptr);
3208 break;
3209 case DEVICE_BIG_ENDIAN:
3210 val = ldq_be_p(ptr);
3211 break;
3212 default:
3213 val = ldq_p(ptr);
3214 break;
3216 r = MEMTX_OK;
3218 if (result) {
3219 *result = r;
3221 if (release_lock) {
3222 qemu_mutex_unlock_iothread();
3224 rcu_read_unlock();
3225 return val;
3228 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3229 MemTxAttrs attrs, MemTxResult *result)
3231 return address_space_ldq_internal(as, addr, attrs, result,
3232 DEVICE_NATIVE_ENDIAN);
3235 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3236 MemTxAttrs attrs, MemTxResult *result)
3238 return address_space_ldq_internal(as, addr, attrs, result,
3239 DEVICE_LITTLE_ENDIAN);
3242 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3243 MemTxAttrs attrs, MemTxResult *result)
3245 return address_space_ldq_internal(as, addr, attrs, result,
3246 DEVICE_BIG_ENDIAN);
3249 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3251 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3254 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3256 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3259 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3261 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3264 /* XXX: optimize */
3265 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3266 MemTxAttrs attrs, MemTxResult *result)
3268 uint8_t val;
3269 MemTxResult r;
3271 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3272 if (result) {
3273 *result = r;
3275 return val;
3278 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3280 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3283 /* warning: addr must be aligned */
3284 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3285 hwaddr addr,
3286 MemTxAttrs attrs,
3287 MemTxResult *result,
3288 enum device_endian endian)
3290 uint8_t *ptr;
3291 uint64_t val;
3292 MemoryRegion *mr;
3293 hwaddr l = 2;
3294 hwaddr addr1;
3295 MemTxResult r;
3296 bool release_lock = false;
3298 rcu_read_lock();
3299 mr = address_space_translate(as, addr, &addr1, &l,
3300 false);
3301 if (l < 2 || !memory_access_is_direct(mr, false)) {
3302 release_lock |= prepare_mmio_access(mr);
3304 /* I/O case */
3305 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3306 #if defined(TARGET_WORDS_BIGENDIAN)
3307 if (endian == DEVICE_LITTLE_ENDIAN) {
3308 val = bswap16(val);
3310 #else
3311 if (endian == DEVICE_BIG_ENDIAN) {
3312 val = bswap16(val);
3314 #endif
3315 } else {
3316 /* RAM case */
3317 ptr = qemu_get_ram_ptr(mr->ram_block,
3318 (memory_region_get_ram_addr(mr)
3319 & TARGET_PAGE_MASK)
3320 + addr1);
3321 switch (endian) {
3322 case DEVICE_LITTLE_ENDIAN:
3323 val = lduw_le_p(ptr);
3324 break;
3325 case DEVICE_BIG_ENDIAN:
3326 val = lduw_be_p(ptr);
3327 break;
3328 default:
3329 val = lduw_p(ptr);
3330 break;
3332 r = MEMTX_OK;
3334 if (result) {
3335 *result = r;
3337 if (release_lock) {
3338 qemu_mutex_unlock_iothread();
3340 rcu_read_unlock();
3341 return val;
3344 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3345 MemTxAttrs attrs, MemTxResult *result)
3347 return address_space_lduw_internal(as, addr, attrs, result,
3348 DEVICE_NATIVE_ENDIAN);
3351 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3352 MemTxAttrs attrs, MemTxResult *result)
3354 return address_space_lduw_internal(as, addr, attrs, result,
3355 DEVICE_LITTLE_ENDIAN);
3358 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3359 MemTxAttrs attrs, MemTxResult *result)
3361 return address_space_lduw_internal(as, addr, attrs, result,
3362 DEVICE_BIG_ENDIAN);
3365 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3367 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3370 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3372 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3375 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3377 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3380 /* warning: addr must be aligned. The ram page is not masked as dirty
3381 and the code inside is not invalidated. It is useful if the dirty
3382 bits are used to track modified PTEs */
3383 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3384 MemTxAttrs attrs, MemTxResult *result)
3386 uint8_t *ptr;
3387 MemoryRegion *mr;
3388 hwaddr l = 4;
3389 hwaddr addr1;
3390 MemTxResult r;
3391 uint8_t dirty_log_mask;
3392 bool release_lock = false;
3394 rcu_read_lock();
3395 mr = address_space_translate(as, addr, &addr1, &l,
3396 true);
3397 if (l < 4 || !memory_access_is_direct(mr, true)) {
3398 release_lock |= prepare_mmio_access(mr);
3400 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3401 } else {
3402 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3403 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3404 stl_p(ptr, val);
3406 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3407 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3408 cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3409 r = MEMTX_OK;
3411 if (result) {
3412 *result = r;
3414 if (release_lock) {
3415 qemu_mutex_unlock_iothread();
3417 rcu_read_unlock();
3420 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3422 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3425 /* warning: addr must be aligned */
3426 static inline void address_space_stl_internal(AddressSpace *as,
3427 hwaddr addr, uint32_t val,
3428 MemTxAttrs attrs,
3429 MemTxResult *result,
3430 enum device_endian endian)
3432 uint8_t *ptr;
3433 MemoryRegion *mr;
3434 hwaddr l = 4;
3435 hwaddr addr1;
3436 MemTxResult r;
3437 bool release_lock = false;
3439 rcu_read_lock();
3440 mr = address_space_translate(as, addr, &addr1, &l,
3441 true);
3442 if (l < 4 || !memory_access_is_direct(mr, true)) {
3443 release_lock |= prepare_mmio_access(mr);
3445 #if defined(TARGET_WORDS_BIGENDIAN)
3446 if (endian == DEVICE_LITTLE_ENDIAN) {
3447 val = bswap32(val);
3449 #else
3450 if (endian == DEVICE_BIG_ENDIAN) {
3451 val = bswap32(val);
3453 #endif
3454 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3455 } else {
3456 /* RAM case */
3457 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3458 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3459 switch (endian) {
3460 case DEVICE_LITTLE_ENDIAN:
3461 stl_le_p(ptr, val);
3462 break;
3463 case DEVICE_BIG_ENDIAN:
3464 stl_be_p(ptr, val);
3465 break;
3466 default:
3467 stl_p(ptr, val);
3468 break;
3470 invalidate_and_set_dirty(mr, addr1, 4);
3471 r = MEMTX_OK;
3473 if (result) {
3474 *result = r;
3476 if (release_lock) {
3477 qemu_mutex_unlock_iothread();
3479 rcu_read_unlock();
3482 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3483 MemTxAttrs attrs, MemTxResult *result)
3485 address_space_stl_internal(as, addr, val, attrs, result,
3486 DEVICE_NATIVE_ENDIAN);
3489 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3490 MemTxAttrs attrs, MemTxResult *result)
3492 address_space_stl_internal(as, addr, val, attrs, result,
3493 DEVICE_LITTLE_ENDIAN);
3496 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3497 MemTxAttrs attrs, MemTxResult *result)
3499 address_space_stl_internal(as, addr, val, attrs, result,
3500 DEVICE_BIG_ENDIAN);
3503 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3505 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3508 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3510 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3513 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3515 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3518 /* XXX: optimize */
3519 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3520 MemTxAttrs attrs, MemTxResult *result)
3522 uint8_t v = val;
3523 MemTxResult r;
3525 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3526 if (result) {
3527 *result = r;
3531 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3533 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3536 /* warning: addr must be aligned */
3537 static inline void address_space_stw_internal(AddressSpace *as,
3538 hwaddr addr, uint32_t val,
3539 MemTxAttrs attrs,
3540 MemTxResult *result,
3541 enum device_endian endian)
3543 uint8_t *ptr;
3544 MemoryRegion *mr;
3545 hwaddr l = 2;
3546 hwaddr addr1;
3547 MemTxResult r;
3548 bool release_lock = false;
3550 rcu_read_lock();
3551 mr = address_space_translate(as, addr, &addr1, &l, true);
3552 if (l < 2 || !memory_access_is_direct(mr, true)) {
3553 release_lock |= prepare_mmio_access(mr);
3555 #if defined(TARGET_WORDS_BIGENDIAN)
3556 if (endian == DEVICE_LITTLE_ENDIAN) {
3557 val = bswap16(val);
3559 #else
3560 if (endian == DEVICE_BIG_ENDIAN) {
3561 val = bswap16(val);
3563 #endif
3564 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3565 } else {
3566 /* RAM case */
3567 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3568 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3569 switch (endian) {
3570 case DEVICE_LITTLE_ENDIAN:
3571 stw_le_p(ptr, val);
3572 break;
3573 case DEVICE_BIG_ENDIAN:
3574 stw_be_p(ptr, val);
3575 break;
3576 default:
3577 stw_p(ptr, val);
3578 break;
3580 invalidate_and_set_dirty(mr, addr1, 2);
3581 r = MEMTX_OK;
3583 if (result) {
3584 *result = r;
3586 if (release_lock) {
3587 qemu_mutex_unlock_iothread();
3589 rcu_read_unlock();
3592 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3593 MemTxAttrs attrs, MemTxResult *result)
3595 address_space_stw_internal(as, addr, val, attrs, result,
3596 DEVICE_NATIVE_ENDIAN);
3599 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3600 MemTxAttrs attrs, MemTxResult *result)
3602 address_space_stw_internal(as, addr, val, attrs, result,
3603 DEVICE_LITTLE_ENDIAN);
3606 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3607 MemTxAttrs attrs, MemTxResult *result)
3609 address_space_stw_internal(as, addr, val, attrs, result,
3610 DEVICE_BIG_ENDIAN);
3613 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3615 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3618 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3620 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3623 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3625 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3628 /* XXX: optimize */
3629 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3630 MemTxAttrs attrs, MemTxResult *result)
3632 MemTxResult r;
3633 val = tswap64(val);
3634 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3635 if (result) {
3636 *result = r;
3640 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3641 MemTxAttrs attrs, MemTxResult *result)
3643 MemTxResult r;
3644 val = cpu_to_le64(val);
3645 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3646 if (result) {
3647 *result = r;
3650 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3651 MemTxAttrs attrs, MemTxResult *result)
3653 MemTxResult r;
3654 val = cpu_to_be64(val);
3655 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3656 if (result) {
3657 *result = r;
3661 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3663 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3666 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3668 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3671 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3673 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3676 /* virtual memory access for debug (includes writing to ROM) */
3677 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3678 uint8_t *buf, int len, int is_write)
3680 int l;
3681 hwaddr phys_addr;
3682 target_ulong page;
3684 while (len > 0) {
3685 int asidx;
3686 MemTxAttrs attrs;
3688 page = addr & TARGET_PAGE_MASK;
3689 phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3690 asidx = cpu_asidx_from_attrs(cpu, attrs);
3691 /* if no physical page mapped, return an error */
3692 if (phys_addr == -1)
3693 return -1;
3694 l = (page + TARGET_PAGE_SIZE) - addr;
3695 if (l > len)
3696 l = len;
3697 phys_addr += (addr & ~TARGET_PAGE_MASK);
3698 if (is_write) {
3699 cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3700 phys_addr, buf, l);
3701 } else {
3702 address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3703 MEMTXATTRS_UNSPECIFIED,
3704 buf, l, 0);
3706 len -= l;
3707 buf += l;
3708 addr += l;
3710 return 0;
3714 * Allows code that needs to deal with migration bitmaps etc to still be built
3715 * target independent.
3717 size_t qemu_target_page_bits(void)
3719 return TARGET_PAGE_BITS;
3722 #endif
3725 * A helper function for the _utterly broken_ virtio device model to find out if
3726 * it's running on a big endian machine. Don't do this at home kids!
3728 bool target_words_bigendian(void);
3729 bool target_words_bigendian(void)
3731 #if defined(TARGET_WORDS_BIGENDIAN)
3732 return true;
3733 #else
3734 return false;
3735 #endif
3738 #ifndef CONFIG_USER_ONLY
3739 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3741 MemoryRegion*mr;
3742 hwaddr l = 1;
3743 bool res;
3745 rcu_read_lock();
3746 mr = address_space_translate(&address_space_memory,
3747 phys_addr, &phys_addr, &l, false);
3749 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3750 rcu_read_unlock();
3751 return res;
3754 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3756 RAMBlock *block;
3757 int ret = 0;
3759 rcu_read_lock();
3760 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3761 ret = func(block->idstr, block->host, block->offset,
3762 block->used_length, opaque);
3763 if (ret) {
3764 break;
3767 rcu_read_unlock();
3768 return ret;
3770 #endif