exec: Fix memory allocation when memory path isn't on hugetlbfs
[qemu/kevin.git] / exec.c
blob274b619f888787a41109e0ffa1daec4bf318a64c
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #ifndef _WIN32
21 #include <sys/mman.h>
22 #endif
24 #include "qemu-common.h"
25 #include "cpu.h"
26 #include "tcg.h"
27 #include "hw/hw.h"
28 #if !defined(CONFIG_USER_ONLY)
29 #include "hw/boards.h"
30 #endif
31 #include "hw/qdev.h"
32 #include "sysemu/kvm.h"
33 #include "sysemu/sysemu.h"
34 #include "hw/xen/xen.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "qemu/error-report.h"
38 #include "exec/memory.h"
39 #include "sysemu/dma.h"
40 #include "exec/address-spaces.h"
41 #if defined(CONFIG_USER_ONLY)
42 #include <qemu.h>
43 #else /* !CONFIG_USER_ONLY */
44 #include "sysemu/xen-mapcache.h"
45 #include "trace.h"
46 #endif
47 #include "exec/cpu-all.h"
48 #include "qemu/rcu_queue.h"
49 #include "qemu/main-loop.h"
50 #include "translate-all.h"
51 #include "sysemu/replay.h"
53 #include "exec/memory-internal.h"
54 #include "exec/ram_addr.h"
55 #include "exec/log.h"
57 #include "qemu/range.h"
58 #ifndef _WIN32
59 #include "qemu/mmap-alloc.h"
60 #endif
62 //#define DEBUG_SUBPAGE
64 #if !defined(CONFIG_USER_ONLY)
65 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
66 * are protected by the ramlist lock.
68 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
70 static MemoryRegion *system_memory;
71 static MemoryRegion *system_io;
73 AddressSpace address_space_io;
74 AddressSpace address_space_memory;
76 MemoryRegion io_mem_rom, io_mem_notdirty;
77 static MemoryRegion io_mem_unassigned;
79 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
80 #define RAM_PREALLOC (1 << 0)
82 /* RAM is mmap-ed with MAP_SHARED */
83 #define RAM_SHARED (1 << 1)
85 /* Only a portion of RAM (used_length) is actually used, and migrated.
86 * This used_length size can change across reboots.
88 #define RAM_RESIZEABLE (1 << 2)
90 #endif
92 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
93 /* current CPU in the current thread. It is only valid inside
94 cpu_exec() */
95 __thread CPUState *current_cpu;
96 /* 0 = Do not count executed instructions.
97 1 = Precise instruction counting.
98 2 = Adaptive rate instruction counting. */
99 int use_icount;
101 #if !defined(CONFIG_USER_ONLY)
103 typedef struct PhysPageEntry PhysPageEntry;
105 struct PhysPageEntry {
106 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
107 uint32_t skip : 6;
108 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
109 uint32_t ptr : 26;
112 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
114 /* Size of the L2 (and L3, etc) page tables. */
115 #define ADDR_SPACE_BITS 64
117 #define P_L2_BITS 9
118 #define P_L2_SIZE (1 << P_L2_BITS)
120 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
122 typedef PhysPageEntry Node[P_L2_SIZE];
124 typedef struct PhysPageMap {
125 struct rcu_head rcu;
127 unsigned sections_nb;
128 unsigned sections_nb_alloc;
129 unsigned nodes_nb;
130 unsigned nodes_nb_alloc;
131 Node *nodes;
132 MemoryRegionSection *sections;
133 } PhysPageMap;
135 struct AddressSpaceDispatch {
136 struct rcu_head rcu;
138 MemoryRegionSection *mru_section;
139 /* This is a multi-level map on the physical address space.
140 * The bottom level has pointers to MemoryRegionSections.
142 PhysPageEntry phys_map;
143 PhysPageMap map;
144 AddressSpace *as;
147 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
148 typedef struct subpage_t {
149 MemoryRegion iomem;
150 AddressSpace *as;
151 hwaddr base;
152 uint16_t sub_section[TARGET_PAGE_SIZE];
153 } subpage_t;
155 #define PHYS_SECTION_UNASSIGNED 0
156 #define PHYS_SECTION_NOTDIRTY 1
157 #define PHYS_SECTION_ROM 2
158 #define PHYS_SECTION_WATCH 3
160 static void io_mem_init(void);
161 static void memory_map_init(void);
162 static void tcg_commit(MemoryListener *listener);
164 static MemoryRegion io_mem_watch;
167 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
168 * @cpu: the CPU whose AddressSpace this is
169 * @as: the AddressSpace itself
170 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
171 * @tcg_as_listener: listener for tracking changes to the AddressSpace
173 struct CPUAddressSpace {
174 CPUState *cpu;
175 AddressSpace *as;
176 struct AddressSpaceDispatch *memory_dispatch;
177 MemoryListener tcg_as_listener;
180 #endif
182 #if !defined(CONFIG_USER_ONLY)
184 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
186 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
187 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
188 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
189 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
193 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
195 unsigned i;
196 uint32_t ret;
197 PhysPageEntry e;
198 PhysPageEntry *p;
200 ret = map->nodes_nb++;
201 p = map->nodes[ret];
202 assert(ret != PHYS_MAP_NODE_NIL);
203 assert(ret != map->nodes_nb_alloc);
205 e.skip = leaf ? 0 : 1;
206 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
207 for (i = 0; i < P_L2_SIZE; ++i) {
208 memcpy(&p[i], &e, sizeof(e));
210 return ret;
213 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
214 hwaddr *index, hwaddr *nb, uint16_t leaf,
215 int level)
217 PhysPageEntry *p;
218 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
220 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
221 lp->ptr = phys_map_node_alloc(map, level == 0);
223 p = map->nodes[lp->ptr];
224 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
226 while (*nb && lp < &p[P_L2_SIZE]) {
227 if ((*index & (step - 1)) == 0 && *nb >= step) {
228 lp->skip = 0;
229 lp->ptr = leaf;
230 *index += step;
231 *nb -= step;
232 } else {
233 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
235 ++lp;
239 static void phys_page_set(AddressSpaceDispatch *d,
240 hwaddr index, hwaddr nb,
241 uint16_t leaf)
243 /* Wildly overreserve - it doesn't matter much. */
244 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
246 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
249 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
250 * and update our entry so we can skip it and go directly to the destination.
252 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
254 unsigned valid_ptr = P_L2_SIZE;
255 int valid = 0;
256 PhysPageEntry *p;
257 int i;
259 if (lp->ptr == PHYS_MAP_NODE_NIL) {
260 return;
263 p = nodes[lp->ptr];
264 for (i = 0; i < P_L2_SIZE; i++) {
265 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
266 continue;
269 valid_ptr = i;
270 valid++;
271 if (p[i].skip) {
272 phys_page_compact(&p[i], nodes, compacted);
276 /* We can only compress if there's only one child. */
277 if (valid != 1) {
278 return;
281 assert(valid_ptr < P_L2_SIZE);
283 /* Don't compress if it won't fit in the # of bits we have. */
284 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
285 return;
288 lp->ptr = p[valid_ptr].ptr;
289 if (!p[valid_ptr].skip) {
290 /* If our only child is a leaf, make this a leaf. */
291 /* By design, we should have made this node a leaf to begin with so we
292 * should never reach here.
293 * But since it's so simple to handle this, let's do it just in case we
294 * change this rule.
296 lp->skip = 0;
297 } else {
298 lp->skip += p[valid_ptr].skip;
302 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
304 DECLARE_BITMAP(compacted, nodes_nb);
306 if (d->phys_map.skip) {
307 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
311 static inline bool section_covers_addr(const MemoryRegionSection *section,
312 hwaddr addr)
314 /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
315 * the section must cover the entire address space.
317 return section->size.hi ||
318 range_covers_byte(section->offset_within_address_space,
319 section->size.lo, addr);
322 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
323 Node *nodes, MemoryRegionSection *sections)
325 PhysPageEntry *p;
326 hwaddr index = addr >> TARGET_PAGE_BITS;
327 int i;
329 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
330 if (lp.ptr == PHYS_MAP_NODE_NIL) {
331 return &sections[PHYS_SECTION_UNASSIGNED];
333 p = nodes[lp.ptr];
334 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
337 if (section_covers_addr(&sections[lp.ptr], addr)) {
338 return &sections[lp.ptr];
339 } else {
340 return &sections[PHYS_SECTION_UNASSIGNED];
344 bool memory_region_is_unassigned(MemoryRegion *mr)
346 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
347 && mr != &io_mem_watch;
350 /* Called from RCU critical section */
351 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
352 hwaddr addr,
353 bool resolve_subpage)
355 MemoryRegionSection *section = atomic_read(&d->mru_section);
356 subpage_t *subpage;
357 bool update;
359 if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
360 section_covers_addr(section, addr)) {
361 update = false;
362 } else {
363 section = phys_page_find(d->phys_map, addr, d->map.nodes,
364 d->map.sections);
365 update = true;
367 if (resolve_subpage && section->mr->subpage) {
368 subpage = container_of(section->mr, subpage_t, iomem);
369 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
371 if (update) {
372 atomic_set(&d->mru_section, section);
374 return section;
377 /* Called from RCU critical section */
378 static MemoryRegionSection *
379 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
380 hwaddr *plen, bool resolve_subpage)
382 MemoryRegionSection *section;
383 MemoryRegion *mr;
384 Int128 diff;
386 section = address_space_lookup_region(d, addr, resolve_subpage);
387 /* Compute offset within MemoryRegionSection */
388 addr -= section->offset_within_address_space;
390 /* Compute offset within MemoryRegion */
391 *xlat = addr + section->offset_within_region;
393 mr = section->mr;
395 /* MMIO registers can be expected to perform full-width accesses based only
396 * on their address, without considering adjacent registers that could
397 * decode to completely different MemoryRegions. When such registers
398 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
399 * regions overlap wildly. For this reason we cannot clamp the accesses
400 * here.
402 * If the length is small (as is the case for address_space_ldl/stl),
403 * everything works fine. If the incoming length is large, however,
404 * the caller really has to do the clamping through memory_access_size.
406 if (memory_region_is_ram(mr)) {
407 diff = int128_sub(section->size, int128_make64(addr));
408 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
410 return section;
413 /* Called from RCU critical section */
414 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
415 hwaddr *xlat, hwaddr *plen,
416 bool is_write)
418 IOMMUTLBEntry iotlb;
419 MemoryRegionSection *section;
420 MemoryRegion *mr;
422 for (;;) {
423 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
424 section = address_space_translate_internal(d, addr, &addr, plen, true);
425 mr = section->mr;
427 if (!mr->iommu_ops) {
428 break;
431 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
432 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
433 | (addr & iotlb.addr_mask));
434 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
435 if (!(iotlb.perm & (1 << is_write))) {
436 mr = &io_mem_unassigned;
437 break;
440 as = iotlb.target_as;
443 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
444 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
445 *plen = MIN(page, *plen);
448 *xlat = addr;
449 return mr;
452 /* Called from RCU critical section */
453 MemoryRegionSection *
454 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
455 hwaddr *xlat, hwaddr *plen)
457 MemoryRegionSection *section;
458 AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;
460 section = address_space_translate_internal(d, addr, xlat, plen, false);
462 assert(!section->mr->iommu_ops);
463 return section;
465 #endif
467 #if !defined(CONFIG_USER_ONLY)
469 static int cpu_common_post_load(void *opaque, int version_id)
471 CPUState *cpu = opaque;
473 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
474 version_id is increased. */
475 cpu->interrupt_request &= ~0x01;
476 tlb_flush(cpu, 1);
478 return 0;
481 static int cpu_common_pre_load(void *opaque)
483 CPUState *cpu = opaque;
485 cpu->exception_index = -1;
487 return 0;
490 static bool cpu_common_exception_index_needed(void *opaque)
492 CPUState *cpu = opaque;
494 return tcg_enabled() && cpu->exception_index != -1;
497 static const VMStateDescription vmstate_cpu_common_exception_index = {
498 .name = "cpu_common/exception_index",
499 .version_id = 1,
500 .minimum_version_id = 1,
501 .needed = cpu_common_exception_index_needed,
502 .fields = (VMStateField[]) {
503 VMSTATE_INT32(exception_index, CPUState),
504 VMSTATE_END_OF_LIST()
508 static bool cpu_common_crash_occurred_needed(void *opaque)
510 CPUState *cpu = opaque;
512 return cpu->crash_occurred;
515 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
516 .name = "cpu_common/crash_occurred",
517 .version_id = 1,
518 .minimum_version_id = 1,
519 .needed = cpu_common_crash_occurred_needed,
520 .fields = (VMStateField[]) {
521 VMSTATE_BOOL(crash_occurred, CPUState),
522 VMSTATE_END_OF_LIST()
526 const VMStateDescription vmstate_cpu_common = {
527 .name = "cpu_common",
528 .version_id = 1,
529 .minimum_version_id = 1,
530 .pre_load = cpu_common_pre_load,
531 .post_load = cpu_common_post_load,
532 .fields = (VMStateField[]) {
533 VMSTATE_UINT32(halted, CPUState),
534 VMSTATE_UINT32(interrupt_request, CPUState),
535 VMSTATE_END_OF_LIST()
537 .subsections = (const VMStateDescription*[]) {
538 &vmstate_cpu_common_exception_index,
539 &vmstate_cpu_common_crash_occurred,
540 NULL
544 #endif
546 CPUState *qemu_get_cpu(int index)
548 CPUState *cpu;
550 CPU_FOREACH(cpu) {
551 if (cpu->cpu_index == index) {
552 return cpu;
556 return NULL;
559 #if !defined(CONFIG_USER_ONLY)
560 void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
562 CPUAddressSpace *newas;
564 /* Target code should have set num_ases before calling us */
565 assert(asidx < cpu->num_ases);
567 if (asidx == 0) {
568 /* address space 0 gets the convenience alias */
569 cpu->as = as;
572 /* KVM cannot currently support multiple address spaces. */
573 assert(asidx == 0 || !kvm_enabled());
575 if (!cpu->cpu_ases) {
576 cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
579 newas = &cpu->cpu_ases[asidx];
580 newas->cpu = cpu;
581 newas->as = as;
582 if (tcg_enabled()) {
583 newas->tcg_as_listener.commit = tcg_commit;
584 memory_listener_register(&newas->tcg_as_listener, as);
588 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
590 /* Return the AddressSpace corresponding to the specified index */
591 return cpu->cpu_ases[asidx].as;
593 #endif
595 #ifndef CONFIG_USER_ONLY
596 static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
598 static int cpu_get_free_index(Error **errp)
600 int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
602 if (cpu >= MAX_CPUMASK_BITS) {
603 error_setg(errp, "Trying to use more CPUs than max of %d",
604 MAX_CPUMASK_BITS);
605 return -1;
608 bitmap_set(cpu_index_map, cpu, 1);
609 return cpu;
612 void cpu_exec_exit(CPUState *cpu)
614 if (cpu->cpu_index == -1) {
615 /* cpu_index was never allocated by this @cpu or was already freed. */
616 return;
619 bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
620 cpu->cpu_index = -1;
622 #else
624 static int cpu_get_free_index(Error **errp)
626 CPUState *some_cpu;
627 int cpu_index = 0;
629 CPU_FOREACH(some_cpu) {
630 cpu_index++;
632 return cpu_index;
635 void cpu_exec_exit(CPUState *cpu)
638 #endif
640 void cpu_exec_init(CPUState *cpu, Error **errp)
642 CPUClass *cc = CPU_GET_CLASS(cpu);
643 int cpu_index;
644 Error *local_err = NULL;
646 cpu->as = NULL;
647 cpu->num_ases = 0;
649 #ifndef CONFIG_USER_ONLY
650 cpu->thread_id = qemu_get_thread_id();
652 /* This is a softmmu CPU object, so create a property for it
653 * so users can wire up its memory. (This can't go in qom/cpu.c
654 * because that file is compiled only once for both user-mode
655 * and system builds.) The default if no link is set up is to use
656 * the system address space.
658 object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
659 (Object **)&cpu->memory,
660 qdev_prop_allow_set_link_before_realize,
661 OBJ_PROP_LINK_UNREF_ON_RELEASE,
662 &error_abort);
663 cpu->memory = system_memory;
664 object_ref(OBJECT(cpu->memory));
665 #endif
667 #if defined(CONFIG_USER_ONLY)
668 cpu_list_lock();
669 #endif
670 cpu_index = cpu->cpu_index = cpu_get_free_index(&local_err);
671 if (local_err) {
672 error_propagate(errp, local_err);
673 #if defined(CONFIG_USER_ONLY)
674 cpu_list_unlock();
675 #endif
676 return;
678 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
679 #if defined(CONFIG_USER_ONLY)
680 cpu_list_unlock();
681 #endif
682 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
683 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
685 if (cc->vmsd != NULL) {
686 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
690 #if defined(CONFIG_USER_ONLY)
691 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
693 tb_invalidate_phys_page_range(pc, pc + 1, 0);
695 #else
696 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
698 MemTxAttrs attrs;
699 hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
700 int asidx = cpu_asidx_from_attrs(cpu, attrs);
701 if (phys != -1) {
702 tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
703 phys | (pc & ~TARGET_PAGE_MASK));
706 #endif
708 #if defined(CONFIG_USER_ONLY)
709 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
714 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
715 int flags)
717 return -ENOSYS;
720 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
724 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
725 int flags, CPUWatchpoint **watchpoint)
727 return -ENOSYS;
729 #else
730 /* Add a watchpoint. */
731 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
732 int flags, CPUWatchpoint **watchpoint)
734 CPUWatchpoint *wp;
736 /* forbid ranges which are empty or run off the end of the address space */
737 if (len == 0 || (addr + len - 1) < addr) {
738 error_report("tried to set invalid watchpoint at %"
739 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
740 return -EINVAL;
742 wp = g_malloc(sizeof(*wp));
744 wp->vaddr = addr;
745 wp->len = len;
746 wp->flags = flags;
748 /* keep all GDB-injected watchpoints in front */
749 if (flags & BP_GDB) {
750 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
751 } else {
752 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
755 tlb_flush_page(cpu, addr);
757 if (watchpoint)
758 *watchpoint = wp;
759 return 0;
762 /* Remove a specific watchpoint. */
763 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
764 int flags)
766 CPUWatchpoint *wp;
768 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
769 if (addr == wp->vaddr && len == wp->len
770 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
771 cpu_watchpoint_remove_by_ref(cpu, wp);
772 return 0;
775 return -ENOENT;
778 /* Remove a specific watchpoint by reference. */
779 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
781 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
783 tlb_flush_page(cpu, watchpoint->vaddr);
785 g_free(watchpoint);
788 /* Remove all matching watchpoints. */
789 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
791 CPUWatchpoint *wp, *next;
793 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
794 if (wp->flags & mask) {
795 cpu_watchpoint_remove_by_ref(cpu, wp);
800 /* Return true if this watchpoint address matches the specified
801 * access (ie the address range covered by the watchpoint overlaps
802 * partially or completely with the address range covered by the
803 * access).
805 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
806 vaddr addr,
807 vaddr len)
809 /* We know the lengths are non-zero, but a little caution is
810 * required to avoid errors in the case where the range ends
811 * exactly at the top of the address space and so addr + len
812 * wraps round to zero.
814 vaddr wpend = wp->vaddr + wp->len - 1;
815 vaddr addrend = addr + len - 1;
817 return !(addr > wpend || wp->vaddr > addrend);
820 #endif
822 /* Add a breakpoint. */
823 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
824 CPUBreakpoint **breakpoint)
826 CPUBreakpoint *bp;
828 bp = g_malloc(sizeof(*bp));
830 bp->pc = pc;
831 bp->flags = flags;
833 /* keep all GDB-injected breakpoints in front */
834 if (flags & BP_GDB) {
835 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
836 } else {
837 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
840 breakpoint_invalidate(cpu, pc);
842 if (breakpoint) {
843 *breakpoint = bp;
845 return 0;
848 /* Remove a specific breakpoint. */
849 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
851 CPUBreakpoint *bp;
853 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
854 if (bp->pc == pc && bp->flags == flags) {
855 cpu_breakpoint_remove_by_ref(cpu, bp);
856 return 0;
859 return -ENOENT;
862 /* Remove a specific breakpoint by reference. */
863 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
865 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
867 breakpoint_invalidate(cpu, breakpoint->pc);
869 g_free(breakpoint);
872 /* Remove all matching breakpoints. */
873 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
875 CPUBreakpoint *bp, *next;
877 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
878 if (bp->flags & mask) {
879 cpu_breakpoint_remove_by_ref(cpu, bp);
884 /* enable or disable single step mode. EXCP_DEBUG is returned by the
885 CPU loop after each instruction */
886 void cpu_single_step(CPUState *cpu, int enabled)
888 if (cpu->singlestep_enabled != enabled) {
889 cpu->singlestep_enabled = enabled;
890 if (kvm_enabled()) {
891 kvm_update_guest_debug(cpu, 0);
892 } else {
893 /* must flush all the translated code to avoid inconsistencies */
894 /* XXX: only flush what is necessary */
895 tb_flush(cpu);
900 void cpu_abort(CPUState *cpu, const char *fmt, ...)
902 va_list ap;
903 va_list ap2;
905 va_start(ap, fmt);
906 va_copy(ap2, ap);
907 fprintf(stderr, "qemu: fatal: ");
908 vfprintf(stderr, fmt, ap);
909 fprintf(stderr, "\n");
910 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
911 if (qemu_log_separate()) {
912 qemu_log("qemu: fatal: ");
913 qemu_log_vprintf(fmt, ap2);
914 qemu_log("\n");
915 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
916 qemu_log_flush();
917 qemu_log_close();
919 va_end(ap2);
920 va_end(ap);
921 replay_finish();
922 #if defined(CONFIG_USER_ONLY)
924 struct sigaction act;
925 sigfillset(&act.sa_mask);
926 act.sa_handler = SIG_DFL;
927 sigaction(SIGABRT, &act, NULL);
929 #endif
930 abort();
933 #if !defined(CONFIG_USER_ONLY)
934 /* Called from RCU critical section */
935 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
937 RAMBlock *block;
939 block = atomic_rcu_read(&ram_list.mru_block);
940 if (block && addr - block->offset < block->max_length) {
941 return block;
943 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
944 if (addr - block->offset < block->max_length) {
945 goto found;
949 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
950 abort();
952 found:
953 /* It is safe to write mru_block outside the iothread lock. This
954 * is what happens:
956 * mru_block = xxx
957 * rcu_read_unlock()
958 * xxx removed from list
959 * rcu_read_lock()
960 * read mru_block
961 * mru_block = NULL;
962 * call_rcu(reclaim_ramblock, xxx);
963 * rcu_read_unlock()
965 * atomic_rcu_set is not needed here. The block was already published
966 * when it was placed into the list. Here we're just making an extra
967 * copy of the pointer.
969 ram_list.mru_block = block;
970 return block;
973 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
975 CPUState *cpu;
976 ram_addr_t start1;
977 RAMBlock *block;
978 ram_addr_t end;
980 end = TARGET_PAGE_ALIGN(start + length);
981 start &= TARGET_PAGE_MASK;
983 rcu_read_lock();
984 block = qemu_get_ram_block(start);
985 assert(block == qemu_get_ram_block(end - 1));
986 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
987 CPU_FOREACH(cpu) {
988 tlb_reset_dirty(cpu, start1, length);
990 rcu_read_unlock();
993 /* Note: start and end must be within the same ram block. */
994 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
995 ram_addr_t length,
996 unsigned client)
998 DirtyMemoryBlocks *blocks;
999 unsigned long end, page;
1000 bool dirty = false;
1002 if (length == 0) {
1003 return false;
1006 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
1007 page = start >> TARGET_PAGE_BITS;
1009 rcu_read_lock();
1011 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1013 while (page < end) {
1014 unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1015 unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1016 unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1018 dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1019 offset, num);
1020 page += num;
1023 rcu_read_unlock();
1025 if (dirty && tcg_enabled()) {
1026 tlb_reset_dirty_range_all(start, length);
1029 return dirty;
1032 /* Called from RCU critical section */
1033 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1034 MemoryRegionSection *section,
1035 target_ulong vaddr,
1036 hwaddr paddr, hwaddr xlat,
1037 int prot,
1038 target_ulong *address)
1040 hwaddr iotlb;
1041 CPUWatchpoint *wp;
1043 if (memory_region_is_ram(section->mr)) {
1044 /* Normal RAM. */
1045 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1046 + xlat;
1047 if (!section->readonly) {
1048 iotlb |= PHYS_SECTION_NOTDIRTY;
1049 } else {
1050 iotlb |= PHYS_SECTION_ROM;
1052 } else {
1053 AddressSpaceDispatch *d;
1055 d = atomic_rcu_read(&section->address_space->dispatch);
1056 iotlb = section - d->map.sections;
1057 iotlb += xlat;
1060 /* Make accesses to pages with watchpoints go via the
1061 watchpoint trap routines. */
1062 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1063 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1064 /* Avoid trapping reads of pages with a write breakpoint. */
1065 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1066 iotlb = PHYS_SECTION_WATCH + paddr;
1067 *address |= TLB_MMIO;
1068 break;
1073 return iotlb;
1075 #endif /* defined(CONFIG_USER_ONLY) */
1077 #if !defined(CONFIG_USER_ONLY)
1079 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1080 uint16_t section);
1081 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1083 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1084 qemu_anon_ram_alloc;
1087 * Set a custom physical guest memory alloator.
1088 * Accelerators with unusual needs may need this. Hopefully, we can
1089 * get rid of it eventually.
1091 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1093 phys_mem_alloc = alloc;
1096 static uint16_t phys_section_add(PhysPageMap *map,
1097 MemoryRegionSection *section)
1099 /* The physical section number is ORed with a page-aligned
1100 * pointer to produce the iotlb entries. Thus it should
1101 * never overflow into the page-aligned value.
1103 assert(map->sections_nb < TARGET_PAGE_SIZE);
1105 if (map->sections_nb == map->sections_nb_alloc) {
1106 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1107 map->sections = g_renew(MemoryRegionSection, map->sections,
1108 map->sections_nb_alloc);
1110 map->sections[map->sections_nb] = *section;
1111 memory_region_ref(section->mr);
1112 return map->sections_nb++;
1115 static void phys_section_destroy(MemoryRegion *mr)
1117 bool have_sub_page = mr->subpage;
1119 memory_region_unref(mr);
1121 if (have_sub_page) {
1122 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1123 object_unref(OBJECT(&subpage->iomem));
1124 g_free(subpage);
1128 static void phys_sections_free(PhysPageMap *map)
1130 while (map->sections_nb > 0) {
1131 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1132 phys_section_destroy(section->mr);
1134 g_free(map->sections);
1135 g_free(map->nodes);
1138 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1140 subpage_t *subpage;
1141 hwaddr base = section->offset_within_address_space
1142 & TARGET_PAGE_MASK;
1143 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1144 d->map.nodes, d->map.sections);
1145 MemoryRegionSection subsection = {
1146 .offset_within_address_space = base,
1147 .size = int128_make64(TARGET_PAGE_SIZE),
1149 hwaddr start, end;
1151 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1153 if (!(existing->mr->subpage)) {
1154 subpage = subpage_init(d->as, base);
1155 subsection.address_space = d->as;
1156 subsection.mr = &subpage->iomem;
1157 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1158 phys_section_add(&d->map, &subsection));
1159 } else {
1160 subpage = container_of(existing->mr, subpage_t, iomem);
1162 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1163 end = start + int128_get64(section->size) - 1;
1164 subpage_register(subpage, start, end,
1165 phys_section_add(&d->map, section));
1169 static void register_multipage(AddressSpaceDispatch *d,
1170 MemoryRegionSection *section)
1172 hwaddr start_addr = section->offset_within_address_space;
1173 uint16_t section_index = phys_section_add(&d->map, section);
1174 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1175 TARGET_PAGE_BITS));
1177 assert(num_pages);
1178 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1181 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1183 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1184 AddressSpaceDispatch *d = as->next_dispatch;
1185 MemoryRegionSection now = *section, remain = *section;
1186 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1188 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1189 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1190 - now.offset_within_address_space;
1192 now.size = int128_min(int128_make64(left), now.size);
1193 register_subpage(d, &now);
1194 } else {
1195 now.size = int128_zero();
1197 while (int128_ne(remain.size, now.size)) {
1198 remain.size = int128_sub(remain.size, now.size);
1199 remain.offset_within_address_space += int128_get64(now.size);
1200 remain.offset_within_region += int128_get64(now.size);
1201 now = remain;
1202 if (int128_lt(remain.size, page_size)) {
1203 register_subpage(d, &now);
1204 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1205 now.size = page_size;
1206 register_subpage(d, &now);
1207 } else {
1208 now.size = int128_and(now.size, int128_neg(page_size));
1209 register_multipage(d, &now);
1214 void qemu_flush_coalesced_mmio_buffer(void)
1216 if (kvm_enabled())
1217 kvm_flush_coalesced_mmio_buffer();
1220 void qemu_mutex_lock_ramlist(void)
1222 qemu_mutex_lock(&ram_list.mutex);
1225 void qemu_mutex_unlock_ramlist(void)
1227 qemu_mutex_unlock(&ram_list.mutex);
1230 #ifdef __linux__
1231 static void *file_ram_alloc(RAMBlock *block,
1232 ram_addr_t memory,
1233 const char *path,
1234 Error **errp)
1236 bool unlink_on_error = false;
1237 char *filename;
1238 char *sanitized_name;
1239 char *c;
1240 void *area;
1241 int fd;
1242 int64_t page_size;
1244 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1245 error_setg(errp,
1246 "host lacks kvm mmu notifiers, -mem-path unsupported");
1247 return NULL;
1250 for (;;) {
1251 fd = open(path, O_RDWR);
1252 if (fd >= 0) {
1253 /* @path names an existing file, use it */
1254 break;
1256 if (errno == ENOENT) {
1257 /* @path names a file that doesn't exist, create it */
1258 fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1259 if (fd >= 0) {
1260 unlink_on_error = true;
1261 break;
1263 } else if (errno == EISDIR) {
1264 /* @path names a directory, create a file there */
1265 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1266 sanitized_name = g_strdup(memory_region_name(block->mr));
1267 for (c = sanitized_name; *c != '\0'; c++) {
1268 if (*c == '/') {
1269 *c = '_';
1273 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1274 sanitized_name);
1275 g_free(sanitized_name);
1277 fd = mkstemp(filename);
1278 if (fd >= 0) {
1279 unlink(filename);
1280 g_free(filename);
1281 break;
1283 g_free(filename);
1285 if (errno != EEXIST && errno != EINTR) {
1286 error_setg_errno(errp, errno,
1287 "can't open backing store %s for guest RAM",
1288 path);
1289 goto error;
1292 * Try again on EINTR and EEXIST. The latter happens when
1293 * something else creates the file between our two open().
1297 page_size = qemu_fd_getpagesize(fd);
1298 block->mr->align = page_size;
1300 if (memory < page_size) {
1301 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1302 "or larger than page size 0x%" PRIx64,
1303 memory, page_size);
1304 goto error;
1307 memory = ROUND_UP(memory, page_size);
1310 * ftruncate is not supported by hugetlbfs in older
1311 * hosts, so don't bother bailing out on errors.
1312 * If anything goes wrong with it under other filesystems,
1313 * mmap will fail.
1315 if (ftruncate(fd, memory)) {
1316 perror("ftruncate");
1319 area = qemu_ram_mmap(fd, memory, page_size, block->flags & RAM_SHARED);
1320 if (area == MAP_FAILED) {
1321 error_setg_errno(errp, errno,
1322 "unable to map backing store for guest RAM");
1323 close(fd);
1324 goto error;
1327 if (mem_prealloc) {
1328 os_mem_prealloc(fd, area, memory);
1331 block->fd = fd;
1332 return area;
1334 error:
1335 if (unlink_on_error) {
1336 unlink(path);
1338 close(fd);
1339 return NULL;
1341 #endif
1343 /* Called with the ramlist lock held. */
1344 static ram_addr_t find_ram_offset(ram_addr_t size)
1346 RAMBlock *block, *next_block;
1347 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1349 assert(size != 0); /* it would hand out same offset multiple times */
1351 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1352 return 0;
1355 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1356 ram_addr_t end, next = RAM_ADDR_MAX;
1358 end = block->offset + block->max_length;
1360 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1361 if (next_block->offset >= end) {
1362 next = MIN(next, next_block->offset);
1365 if (next - end >= size && next - end < mingap) {
1366 offset = end;
1367 mingap = next - end;
1371 if (offset == RAM_ADDR_MAX) {
1372 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1373 (uint64_t)size);
1374 abort();
1377 return offset;
1380 ram_addr_t last_ram_offset(void)
1382 RAMBlock *block;
1383 ram_addr_t last = 0;
1385 rcu_read_lock();
1386 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1387 last = MAX(last, block->offset + block->max_length);
1389 rcu_read_unlock();
1390 return last;
1393 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1395 int ret;
1397 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1398 if (!machine_dump_guest_core(current_machine)) {
1399 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1400 if (ret) {
1401 perror("qemu_madvise");
1402 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1403 "but dump_guest_core=off specified\n");
1408 /* Called within an RCU critical section, or while the ramlist lock
1409 * is held.
1411 static RAMBlock *find_ram_block(ram_addr_t addr)
1413 RAMBlock *block;
1415 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1416 if (block->offset == addr) {
1417 return block;
1421 return NULL;
1424 const char *qemu_ram_get_idstr(RAMBlock *rb)
1426 return rb->idstr;
1429 /* Called with iothread lock held. */
1430 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1432 RAMBlock *new_block, *block;
1434 rcu_read_lock();
1435 new_block = find_ram_block(addr);
1436 assert(new_block);
1437 assert(!new_block->idstr[0]);
1439 if (dev) {
1440 char *id = qdev_get_dev_path(dev);
1441 if (id) {
1442 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1443 g_free(id);
1446 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1448 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1449 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1450 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1451 new_block->idstr);
1452 abort();
1455 rcu_read_unlock();
1458 /* Called with iothread lock held. */
1459 void qemu_ram_unset_idstr(ram_addr_t addr)
1461 RAMBlock *block;
1463 /* FIXME: arch_init.c assumes that this is not called throughout
1464 * migration. Ignore the problem since hot-unplug during migration
1465 * does not work anyway.
1468 rcu_read_lock();
1469 block = find_ram_block(addr);
1470 if (block) {
1471 memset(block->idstr, 0, sizeof(block->idstr));
1473 rcu_read_unlock();
1476 static int memory_try_enable_merging(void *addr, size_t len)
1478 if (!machine_mem_merge(current_machine)) {
1479 /* disabled by the user */
1480 return 0;
1483 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1486 /* Only legal before guest might have detected the memory size: e.g. on
1487 * incoming migration, or right after reset.
1489 * As memory core doesn't know how is memory accessed, it is up to
1490 * resize callback to update device state and/or add assertions to detect
1491 * misuse, if necessary.
1493 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1495 RAMBlock *block = find_ram_block(base);
1497 assert(block);
1499 newsize = HOST_PAGE_ALIGN(newsize);
1501 if (block->used_length == newsize) {
1502 return 0;
1505 if (!(block->flags & RAM_RESIZEABLE)) {
1506 error_setg_errno(errp, EINVAL,
1507 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1508 " in != 0x" RAM_ADDR_FMT, block->idstr,
1509 newsize, block->used_length);
1510 return -EINVAL;
1513 if (block->max_length < newsize) {
1514 error_setg_errno(errp, EINVAL,
1515 "Length too large: %s: 0x" RAM_ADDR_FMT
1516 " > 0x" RAM_ADDR_FMT, block->idstr,
1517 newsize, block->max_length);
1518 return -EINVAL;
1521 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1522 block->used_length = newsize;
1523 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1524 DIRTY_CLIENTS_ALL);
1525 memory_region_set_size(block->mr, newsize);
1526 if (block->resized) {
1527 block->resized(block->idstr, newsize, block->host);
1529 return 0;
1532 /* Called with ram_list.mutex held */
1533 static void dirty_memory_extend(ram_addr_t old_ram_size,
1534 ram_addr_t new_ram_size)
1536 ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1537 DIRTY_MEMORY_BLOCK_SIZE);
1538 ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1539 DIRTY_MEMORY_BLOCK_SIZE);
1540 int i;
1542 /* Only need to extend if block count increased */
1543 if (new_num_blocks <= old_num_blocks) {
1544 return;
1547 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1548 DirtyMemoryBlocks *old_blocks;
1549 DirtyMemoryBlocks *new_blocks;
1550 int j;
1552 old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1553 new_blocks = g_malloc(sizeof(*new_blocks) +
1554 sizeof(new_blocks->blocks[0]) * new_num_blocks);
1556 if (old_num_blocks) {
1557 memcpy(new_blocks->blocks, old_blocks->blocks,
1558 old_num_blocks * sizeof(old_blocks->blocks[0]));
1561 for (j = old_num_blocks; j < new_num_blocks; j++) {
1562 new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1565 atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1567 if (old_blocks) {
1568 g_free_rcu(old_blocks, rcu);
1573 static void ram_block_add(RAMBlock *new_block, Error **errp)
1575 RAMBlock *block;
1576 RAMBlock *last_block = NULL;
1577 ram_addr_t old_ram_size, new_ram_size;
1578 Error *err = NULL;
1580 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1582 qemu_mutex_lock_ramlist();
1583 new_block->offset = find_ram_offset(new_block->max_length);
1585 if (!new_block->host) {
1586 if (xen_enabled()) {
1587 xen_ram_alloc(new_block->offset, new_block->max_length,
1588 new_block->mr, &err);
1589 if (err) {
1590 error_propagate(errp, err);
1591 qemu_mutex_unlock_ramlist();
1593 } else {
1594 new_block->host = phys_mem_alloc(new_block->max_length,
1595 &new_block->mr->align);
1596 if (!new_block->host) {
1597 error_setg_errno(errp, errno,
1598 "cannot set up guest memory '%s'",
1599 memory_region_name(new_block->mr));
1600 qemu_mutex_unlock_ramlist();
1602 memory_try_enable_merging(new_block->host, new_block->max_length);
1606 new_ram_size = MAX(old_ram_size,
1607 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1608 if (new_ram_size > old_ram_size) {
1609 migration_bitmap_extend(old_ram_size, new_ram_size);
1610 dirty_memory_extend(old_ram_size, new_ram_size);
1612 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1613 * QLIST (which has an RCU-friendly variant) does not have insertion at
1614 * tail, so save the last element in last_block.
1616 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1617 last_block = block;
1618 if (block->max_length < new_block->max_length) {
1619 break;
1622 if (block) {
1623 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1624 } else if (last_block) {
1625 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1626 } else { /* list is empty */
1627 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1629 ram_list.mru_block = NULL;
1631 /* Write list before version */
1632 smp_wmb();
1633 ram_list.version++;
1634 qemu_mutex_unlock_ramlist();
1636 cpu_physical_memory_set_dirty_range(new_block->offset,
1637 new_block->used_length,
1638 DIRTY_CLIENTS_ALL);
1640 if (new_block->host) {
1641 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1642 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1643 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1644 if (kvm_enabled()) {
1645 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1650 #ifdef __linux__
1651 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1652 bool share, const char *mem_path,
1653 Error **errp)
1655 RAMBlock *new_block;
1656 Error *local_err = NULL;
1658 if (xen_enabled()) {
1659 error_setg(errp, "-mem-path not supported with Xen");
1660 return NULL;
1663 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1665 * file_ram_alloc() needs to allocate just like
1666 * phys_mem_alloc, but we haven't bothered to provide
1667 * a hook there.
1669 error_setg(errp,
1670 "-mem-path not supported with this accelerator");
1671 return NULL;
1674 size = HOST_PAGE_ALIGN(size);
1675 new_block = g_malloc0(sizeof(*new_block));
1676 new_block->mr = mr;
1677 new_block->used_length = size;
1678 new_block->max_length = size;
1679 new_block->flags = share ? RAM_SHARED : 0;
1680 new_block->host = file_ram_alloc(new_block, size,
1681 mem_path, errp);
1682 if (!new_block->host) {
1683 g_free(new_block);
1684 return NULL;
1687 ram_block_add(new_block, &local_err);
1688 if (local_err) {
1689 g_free(new_block);
1690 error_propagate(errp, local_err);
1691 return NULL;
1693 return new_block;
1695 #endif
1697 static
1698 RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1699 void (*resized)(const char*,
1700 uint64_t length,
1701 void *host),
1702 void *host, bool resizeable,
1703 MemoryRegion *mr, Error **errp)
1705 RAMBlock *new_block;
1706 Error *local_err = NULL;
1708 size = HOST_PAGE_ALIGN(size);
1709 max_size = HOST_PAGE_ALIGN(max_size);
1710 new_block = g_malloc0(sizeof(*new_block));
1711 new_block->mr = mr;
1712 new_block->resized = resized;
1713 new_block->used_length = size;
1714 new_block->max_length = max_size;
1715 assert(max_size >= size);
1716 new_block->fd = -1;
1717 new_block->host = host;
1718 if (host) {
1719 new_block->flags |= RAM_PREALLOC;
1721 if (resizeable) {
1722 new_block->flags |= RAM_RESIZEABLE;
1724 ram_block_add(new_block, &local_err);
1725 if (local_err) {
1726 g_free(new_block);
1727 error_propagate(errp, local_err);
1728 return NULL;
1730 return new_block;
1733 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1734 MemoryRegion *mr, Error **errp)
1736 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1739 RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1741 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1744 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1745 void (*resized)(const char*,
1746 uint64_t length,
1747 void *host),
1748 MemoryRegion *mr, Error **errp)
1750 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1753 static void reclaim_ramblock(RAMBlock *block)
1755 if (block->flags & RAM_PREALLOC) {
1757 } else if (xen_enabled()) {
1758 xen_invalidate_map_cache_entry(block->host);
1759 #ifndef _WIN32
1760 } else if (block->fd >= 0) {
1761 qemu_ram_munmap(block->host, block->max_length);
1762 close(block->fd);
1763 #endif
1764 } else {
1765 qemu_anon_ram_free(block->host, block->max_length);
1767 g_free(block);
1770 void qemu_ram_free(RAMBlock *block)
1772 qemu_mutex_lock_ramlist();
1773 QLIST_REMOVE_RCU(block, next);
1774 ram_list.mru_block = NULL;
1775 /* Write list before version */
1776 smp_wmb();
1777 ram_list.version++;
1778 call_rcu(block, reclaim_ramblock, rcu);
1779 qemu_mutex_unlock_ramlist();
1782 #ifndef _WIN32
1783 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1785 RAMBlock *block;
1786 ram_addr_t offset;
1787 int flags;
1788 void *area, *vaddr;
1790 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1791 offset = addr - block->offset;
1792 if (offset < block->max_length) {
1793 vaddr = ramblock_ptr(block, offset);
1794 if (block->flags & RAM_PREALLOC) {
1796 } else if (xen_enabled()) {
1797 abort();
1798 } else {
1799 flags = MAP_FIXED;
1800 if (block->fd >= 0) {
1801 flags |= (block->flags & RAM_SHARED ?
1802 MAP_SHARED : MAP_PRIVATE);
1803 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1804 flags, block->fd, offset);
1805 } else {
1807 * Remap needs to match alloc. Accelerators that
1808 * set phys_mem_alloc never remap. If they did,
1809 * we'd need a remap hook here.
1811 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1813 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1814 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1815 flags, -1, 0);
1817 if (area != vaddr) {
1818 fprintf(stderr, "Could not remap addr: "
1819 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1820 length, addr);
1821 exit(1);
1823 memory_try_enable_merging(vaddr, length);
1824 qemu_ram_setup_dump(vaddr, length);
1829 #endif /* !_WIN32 */
1831 int qemu_get_ram_fd(ram_addr_t addr)
1833 RAMBlock *block;
1834 int fd;
1836 rcu_read_lock();
1837 block = qemu_get_ram_block(addr);
1838 fd = block->fd;
1839 rcu_read_unlock();
1840 return fd;
1843 void qemu_set_ram_fd(ram_addr_t addr, int fd)
1845 RAMBlock *block;
1847 rcu_read_lock();
1848 block = qemu_get_ram_block(addr);
1849 block->fd = fd;
1850 rcu_read_unlock();
1853 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1855 RAMBlock *block;
1856 void *ptr;
1858 rcu_read_lock();
1859 block = qemu_get_ram_block(addr);
1860 ptr = ramblock_ptr(block, 0);
1861 rcu_read_unlock();
1862 return ptr;
1865 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1866 * This should not be used for general purpose DMA. Use address_space_map
1867 * or address_space_rw instead. For local memory (e.g. video ram) that the
1868 * device owns, use memory_region_get_ram_ptr.
1870 * Called within RCU critical section.
1872 void *qemu_get_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1874 RAMBlock *block = ram_block;
1876 if (block == NULL) {
1877 block = qemu_get_ram_block(addr);
1880 if (xen_enabled() && block->host == NULL) {
1881 /* We need to check if the requested address is in the RAM
1882 * because we don't want to map the entire memory in QEMU.
1883 * In that case just map until the end of the page.
1885 if (block->offset == 0) {
1886 return xen_map_cache(addr, 0, 0);
1889 block->host = xen_map_cache(block->offset, block->max_length, 1);
1891 return ramblock_ptr(block, addr - block->offset);
1894 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1895 * but takes a size argument.
1897 * Called within RCU critical section.
1899 static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1900 hwaddr *size)
1902 RAMBlock *block = ram_block;
1903 ram_addr_t offset_inside_block;
1904 if (*size == 0) {
1905 return NULL;
1908 if (block == NULL) {
1909 block = qemu_get_ram_block(addr);
1911 offset_inside_block = addr - block->offset;
1912 *size = MIN(*size, block->max_length - offset_inside_block);
1914 if (xen_enabled() && block->host == NULL) {
1915 /* We need to check if the requested address is in the RAM
1916 * because we don't want to map the entire memory in QEMU.
1917 * In that case just map the requested area.
1919 if (block->offset == 0) {
1920 return xen_map_cache(addr, *size, 1);
1923 block->host = xen_map_cache(block->offset, block->max_length, 1);
1926 return ramblock_ptr(block, offset_inside_block);
1930 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1931 * in that RAMBlock.
1933 * ptr: Host pointer to look up
1934 * round_offset: If true round the result offset down to a page boundary
1935 * *ram_addr: set to result ram_addr
1936 * *offset: set to result offset within the RAMBlock
1938 * Returns: RAMBlock (or NULL if not found)
1940 * By the time this function returns, the returned pointer is not protected
1941 * by RCU anymore. If the caller is not within an RCU critical section and
1942 * does not hold the iothread lock, it must have other means of protecting the
1943 * pointer, such as a reference to the region that includes the incoming
1944 * ram_addr_t.
1946 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1947 ram_addr_t *ram_addr,
1948 ram_addr_t *offset)
1950 RAMBlock *block;
1951 uint8_t *host = ptr;
1953 if (xen_enabled()) {
1954 rcu_read_lock();
1955 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1956 block = qemu_get_ram_block(*ram_addr);
1957 if (block) {
1958 *offset = (host - block->host);
1960 rcu_read_unlock();
1961 return block;
1964 rcu_read_lock();
1965 block = atomic_rcu_read(&ram_list.mru_block);
1966 if (block && block->host && host - block->host < block->max_length) {
1967 goto found;
1970 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1971 /* This case append when the block is not mapped. */
1972 if (block->host == NULL) {
1973 continue;
1975 if (host - block->host < block->max_length) {
1976 goto found;
1980 rcu_read_unlock();
1981 return NULL;
1983 found:
1984 *offset = (host - block->host);
1985 if (round_offset) {
1986 *offset &= TARGET_PAGE_MASK;
1988 *ram_addr = block->offset + *offset;
1989 rcu_read_unlock();
1990 return block;
1994 * Finds the named RAMBlock
1996 * name: The name of RAMBlock to find
1998 * Returns: RAMBlock (or NULL if not found)
2000 RAMBlock *qemu_ram_block_by_name(const char *name)
2002 RAMBlock *block;
2004 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2005 if (!strcmp(name, block->idstr)) {
2006 return block;
2010 return NULL;
2013 /* Some of the softmmu routines need to translate from a host pointer
2014 (typically a TLB entry) back to a ram offset. */
2015 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2017 RAMBlock *block;
2018 ram_addr_t offset; /* Not used */
2020 block = qemu_ram_block_from_host(ptr, false, ram_addr, &offset);
2022 if (!block) {
2023 return NULL;
2026 return block->mr;
2029 /* Called within RCU critical section. */
2030 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2031 uint64_t val, unsigned size)
2033 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
2034 tb_invalidate_phys_page_fast(ram_addr, size);
2036 switch (size) {
2037 case 1:
2038 stb_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2039 break;
2040 case 2:
2041 stw_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2042 break;
2043 case 4:
2044 stl_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2045 break;
2046 default:
2047 abort();
2049 /* Set both VGA and migration bits for simplicity and to remove
2050 * the notdirty callback faster.
2052 cpu_physical_memory_set_dirty_range(ram_addr, size,
2053 DIRTY_CLIENTS_NOCODE);
2054 /* we remove the notdirty callback only if the code has been
2055 flushed */
2056 if (!cpu_physical_memory_is_clean(ram_addr)) {
2057 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2061 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2062 unsigned size, bool is_write)
2064 return is_write;
2067 static const MemoryRegionOps notdirty_mem_ops = {
2068 .write = notdirty_mem_write,
2069 .valid.accepts = notdirty_mem_accepts,
2070 .endianness = DEVICE_NATIVE_ENDIAN,
2073 /* Generate a debug exception if a watchpoint has been hit. */
2074 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2076 CPUState *cpu = current_cpu;
2077 CPUClass *cc = CPU_GET_CLASS(cpu);
2078 CPUArchState *env = cpu->env_ptr;
2079 target_ulong pc, cs_base;
2080 target_ulong vaddr;
2081 CPUWatchpoint *wp;
2082 int cpu_flags;
2084 if (cpu->watchpoint_hit) {
2085 /* We re-entered the check after replacing the TB. Now raise
2086 * the debug interrupt so that is will trigger after the
2087 * current instruction. */
2088 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2089 return;
2091 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2092 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2093 if (cpu_watchpoint_address_matches(wp, vaddr, len)
2094 && (wp->flags & flags)) {
2095 if (flags == BP_MEM_READ) {
2096 wp->flags |= BP_WATCHPOINT_HIT_READ;
2097 } else {
2098 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2100 wp->hitaddr = vaddr;
2101 wp->hitattrs = attrs;
2102 if (!cpu->watchpoint_hit) {
2103 if (wp->flags & BP_CPU &&
2104 !cc->debug_check_watchpoint(cpu, wp)) {
2105 wp->flags &= ~BP_WATCHPOINT_HIT;
2106 continue;
2108 cpu->watchpoint_hit = wp;
2109 tb_check_watchpoint(cpu);
2110 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2111 cpu->exception_index = EXCP_DEBUG;
2112 cpu_loop_exit(cpu);
2113 } else {
2114 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2115 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2116 cpu_resume_from_signal(cpu, NULL);
2119 } else {
2120 wp->flags &= ~BP_WATCHPOINT_HIT;
2125 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2126 so these check for a hit then pass through to the normal out-of-line
2127 phys routines. */
2128 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2129 unsigned size, MemTxAttrs attrs)
2131 MemTxResult res;
2132 uint64_t data;
2133 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2134 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2136 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2137 switch (size) {
2138 case 1:
2139 data = address_space_ldub(as, addr, attrs, &res);
2140 break;
2141 case 2:
2142 data = address_space_lduw(as, addr, attrs, &res);
2143 break;
2144 case 4:
2145 data = address_space_ldl(as, addr, attrs, &res);
2146 break;
2147 default: abort();
2149 *pdata = data;
2150 return res;
2153 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2154 uint64_t val, unsigned size,
2155 MemTxAttrs attrs)
2157 MemTxResult res;
2158 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2159 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2161 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2162 switch (size) {
2163 case 1:
2164 address_space_stb(as, addr, val, attrs, &res);
2165 break;
2166 case 2:
2167 address_space_stw(as, addr, val, attrs, &res);
2168 break;
2169 case 4:
2170 address_space_stl(as, addr, val, attrs, &res);
2171 break;
2172 default: abort();
2174 return res;
2177 static const MemoryRegionOps watch_mem_ops = {
2178 .read_with_attrs = watch_mem_read,
2179 .write_with_attrs = watch_mem_write,
2180 .endianness = DEVICE_NATIVE_ENDIAN,
2183 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2184 unsigned len, MemTxAttrs attrs)
2186 subpage_t *subpage = opaque;
2187 uint8_t buf[8];
2188 MemTxResult res;
2190 #if defined(DEBUG_SUBPAGE)
2191 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2192 subpage, len, addr);
2193 #endif
2194 res = address_space_read(subpage->as, addr + subpage->base,
2195 attrs, buf, len);
2196 if (res) {
2197 return res;
2199 switch (len) {
2200 case 1:
2201 *data = ldub_p(buf);
2202 return MEMTX_OK;
2203 case 2:
2204 *data = lduw_p(buf);
2205 return MEMTX_OK;
2206 case 4:
2207 *data = ldl_p(buf);
2208 return MEMTX_OK;
2209 case 8:
2210 *data = ldq_p(buf);
2211 return MEMTX_OK;
2212 default:
2213 abort();
2217 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2218 uint64_t value, unsigned len, MemTxAttrs attrs)
2220 subpage_t *subpage = opaque;
2221 uint8_t buf[8];
2223 #if defined(DEBUG_SUBPAGE)
2224 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2225 " value %"PRIx64"\n",
2226 __func__, subpage, len, addr, value);
2227 #endif
2228 switch (len) {
2229 case 1:
2230 stb_p(buf, value);
2231 break;
2232 case 2:
2233 stw_p(buf, value);
2234 break;
2235 case 4:
2236 stl_p(buf, value);
2237 break;
2238 case 8:
2239 stq_p(buf, value);
2240 break;
2241 default:
2242 abort();
2244 return address_space_write(subpage->as, addr + subpage->base,
2245 attrs, buf, len);
2248 static bool subpage_accepts(void *opaque, hwaddr addr,
2249 unsigned len, bool is_write)
2251 subpage_t *subpage = opaque;
2252 #if defined(DEBUG_SUBPAGE)
2253 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2254 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2255 #endif
2257 return address_space_access_valid(subpage->as, addr + subpage->base,
2258 len, is_write);
2261 static const MemoryRegionOps subpage_ops = {
2262 .read_with_attrs = subpage_read,
2263 .write_with_attrs = subpage_write,
2264 .impl.min_access_size = 1,
2265 .impl.max_access_size = 8,
2266 .valid.min_access_size = 1,
2267 .valid.max_access_size = 8,
2268 .valid.accepts = subpage_accepts,
2269 .endianness = DEVICE_NATIVE_ENDIAN,
2272 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2273 uint16_t section)
2275 int idx, eidx;
2277 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2278 return -1;
2279 idx = SUBPAGE_IDX(start);
2280 eidx = SUBPAGE_IDX(end);
2281 #if defined(DEBUG_SUBPAGE)
2282 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2283 __func__, mmio, start, end, idx, eidx, section);
2284 #endif
2285 for (; idx <= eidx; idx++) {
2286 mmio->sub_section[idx] = section;
2289 return 0;
2292 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2294 subpage_t *mmio;
2296 mmio = g_malloc0(sizeof(subpage_t));
2298 mmio->as = as;
2299 mmio->base = base;
2300 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2301 NULL, TARGET_PAGE_SIZE);
2302 mmio->iomem.subpage = true;
2303 #if defined(DEBUG_SUBPAGE)
2304 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2305 mmio, base, TARGET_PAGE_SIZE);
2306 #endif
2307 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2309 return mmio;
2312 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2313 MemoryRegion *mr)
2315 assert(as);
2316 MemoryRegionSection section = {
2317 .address_space = as,
2318 .mr = mr,
2319 .offset_within_address_space = 0,
2320 .offset_within_region = 0,
2321 .size = int128_2_64(),
2324 return phys_section_add(map, &section);
2327 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2329 int asidx = cpu_asidx_from_attrs(cpu, attrs);
2330 CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2331 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2332 MemoryRegionSection *sections = d->map.sections;
2334 return sections[index & ~TARGET_PAGE_MASK].mr;
2337 static void io_mem_init(void)
2339 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2340 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2341 NULL, UINT64_MAX);
2342 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2343 NULL, UINT64_MAX);
2344 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2345 NULL, UINT64_MAX);
2348 static void mem_begin(MemoryListener *listener)
2350 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2351 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2352 uint16_t n;
2354 n = dummy_section(&d->map, as, &io_mem_unassigned);
2355 assert(n == PHYS_SECTION_UNASSIGNED);
2356 n = dummy_section(&d->map, as, &io_mem_notdirty);
2357 assert(n == PHYS_SECTION_NOTDIRTY);
2358 n = dummy_section(&d->map, as, &io_mem_rom);
2359 assert(n == PHYS_SECTION_ROM);
2360 n = dummy_section(&d->map, as, &io_mem_watch);
2361 assert(n == PHYS_SECTION_WATCH);
2363 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2364 d->as = as;
2365 as->next_dispatch = d;
2368 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2370 phys_sections_free(&d->map);
2371 g_free(d);
2374 static void mem_commit(MemoryListener *listener)
2376 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2377 AddressSpaceDispatch *cur = as->dispatch;
2378 AddressSpaceDispatch *next = as->next_dispatch;
2380 phys_page_compact_all(next, next->map.nodes_nb);
2382 atomic_rcu_set(&as->dispatch, next);
2383 if (cur) {
2384 call_rcu(cur, address_space_dispatch_free, rcu);
2388 static void tcg_commit(MemoryListener *listener)
2390 CPUAddressSpace *cpuas;
2391 AddressSpaceDispatch *d;
2393 /* since each CPU stores ram addresses in its TLB cache, we must
2394 reset the modified entries */
2395 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2396 cpu_reloading_memory_map();
2397 /* The CPU and TLB are protected by the iothread lock.
2398 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2399 * may have split the RCU critical section.
2401 d = atomic_rcu_read(&cpuas->as->dispatch);
2402 cpuas->memory_dispatch = d;
2403 tlb_flush(cpuas->cpu, 1);
2406 void address_space_init_dispatch(AddressSpace *as)
2408 as->dispatch = NULL;
2409 as->dispatch_listener = (MemoryListener) {
2410 .begin = mem_begin,
2411 .commit = mem_commit,
2412 .region_add = mem_add,
2413 .region_nop = mem_add,
2414 .priority = 0,
2416 memory_listener_register(&as->dispatch_listener, as);
2419 void address_space_unregister(AddressSpace *as)
2421 memory_listener_unregister(&as->dispatch_listener);
2424 void address_space_destroy_dispatch(AddressSpace *as)
2426 AddressSpaceDispatch *d = as->dispatch;
2428 atomic_rcu_set(&as->dispatch, NULL);
2429 if (d) {
2430 call_rcu(d, address_space_dispatch_free, rcu);
2434 static void memory_map_init(void)
2436 system_memory = g_malloc(sizeof(*system_memory));
2438 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2439 address_space_init(&address_space_memory, system_memory, "memory");
2441 system_io = g_malloc(sizeof(*system_io));
2442 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2443 65536);
2444 address_space_init(&address_space_io, system_io, "I/O");
2447 MemoryRegion *get_system_memory(void)
2449 return system_memory;
2452 MemoryRegion *get_system_io(void)
2454 return system_io;
2457 #endif /* !defined(CONFIG_USER_ONLY) */
2459 /* physical memory access (slow version, mainly for debug) */
2460 #if defined(CONFIG_USER_ONLY)
2461 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2462 uint8_t *buf, int len, int is_write)
2464 int l, flags;
2465 target_ulong page;
2466 void * p;
2468 while (len > 0) {
2469 page = addr & TARGET_PAGE_MASK;
2470 l = (page + TARGET_PAGE_SIZE) - addr;
2471 if (l > len)
2472 l = len;
2473 flags = page_get_flags(page);
2474 if (!(flags & PAGE_VALID))
2475 return -1;
2476 if (is_write) {
2477 if (!(flags & PAGE_WRITE))
2478 return -1;
2479 /* XXX: this code should not depend on lock_user */
2480 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2481 return -1;
2482 memcpy(p, buf, l);
2483 unlock_user(p, addr, l);
2484 } else {
2485 if (!(flags & PAGE_READ))
2486 return -1;
2487 /* XXX: this code should not depend on lock_user */
2488 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2489 return -1;
2490 memcpy(buf, p, l);
2491 unlock_user(p, addr, 0);
2493 len -= l;
2494 buf += l;
2495 addr += l;
2497 return 0;
2500 #else
2502 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2503 hwaddr length)
2505 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2506 /* No early return if dirty_log_mask is or becomes 0, because
2507 * cpu_physical_memory_set_dirty_range will still call
2508 * xen_modified_memory.
2510 if (dirty_log_mask) {
2511 dirty_log_mask =
2512 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2514 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2515 tb_invalidate_phys_range(addr, addr + length);
2516 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2518 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2521 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2523 unsigned access_size_max = mr->ops->valid.max_access_size;
2525 /* Regions are assumed to support 1-4 byte accesses unless
2526 otherwise specified. */
2527 if (access_size_max == 0) {
2528 access_size_max = 4;
2531 /* Bound the maximum access by the alignment of the address. */
2532 if (!mr->ops->impl.unaligned) {
2533 unsigned align_size_max = addr & -addr;
2534 if (align_size_max != 0 && align_size_max < access_size_max) {
2535 access_size_max = align_size_max;
2539 /* Don't attempt accesses larger than the maximum. */
2540 if (l > access_size_max) {
2541 l = access_size_max;
2543 l = pow2floor(l);
2545 return l;
2548 static bool prepare_mmio_access(MemoryRegion *mr)
2550 bool unlocked = !qemu_mutex_iothread_locked();
2551 bool release_lock = false;
2553 if (unlocked && mr->global_locking) {
2554 qemu_mutex_lock_iothread();
2555 unlocked = false;
2556 release_lock = true;
2558 if (mr->flush_coalesced_mmio) {
2559 if (unlocked) {
2560 qemu_mutex_lock_iothread();
2562 qemu_flush_coalesced_mmio_buffer();
2563 if (unlocked) {
2564 qemu_mutex_unlock_iothread();
2568 return release_lock;
2571 /* Called within RCU critical section. */
2572 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2573 MemTxAttrs attrs,
2574 const uint8_t *buf,
2575 int len, hwaddr addr1,
2576 hwaddr l, MemoryRegion *mr)
2578 uint8_t *ptr;
2579 uint64_t val;
2580 MemTxResult result = MEMTX_OK;
2581 bool release_lock = false;
2583 for (;;) {
2584 if (!memory_access_is_direct(mr, true)) {
2585 release_lock |= prepare_mmio_access(mr);
2586 l = memory_access_size(mr, l, addr1);
2587 /* XXX: could force current_cpu to NULL to avoid
2588 potential bugs */
2589 switch (l) {
2590 case 8:
2591 /* 64 bit write access */
2592 val = ldq_p(buf);
2593 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2594 attrs);
2595 break;
2596 case 4:
2597 /* 32 bit write access */
2598 val = ldl_p(buf);
2599 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2600 attrs);
2601 break;
2602 case 2:
2603 /* 16 bit write access */
2604 val = lduw_p(buf);
2605 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2606 attrs);
2607 break;
2608 case 1:
2609 /* 8 bit write access */
2610 val = ldub_p(buf);
2611 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2612 attrs);
2613 break;
2614 default:
2615 abort();
2617 } else {
2618 addr1 += memory_region_get_ram_addr(mr);
2619 /* RAM case */
2620 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
2621 memcpy(ptr, buf, l);
2622 invalidate_and_set_dirty(mr, addr1, l);
2625 if (release_lock) {
2626 qemu_mutex_unlock_iothread();
2627 release_lock = false;
2630 len -= l;
2631 buf += l;
2632 addr += l;
2634 if (!len) {
2635 break;
2638 l = len;
2639 mr = address_space_translate(as, addr, &addr1, &l, true);
2642 return result;
2645 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2646 const uint8_t *buf, int len)
2648 hwaddr l;
2649 hwaddr addr1;
2650 MemoryRegion *mr;
2651 MemTxResult result = MEMTX_OK;
2653 if (len > 0) {
2654 rcu_read_lock();
2655 l = len;
2656 mr = address_space_translate(as, addr, &addr1, &l, true);
2657 result = address_space_write_continue(as, addr, attrs, buf, len,
2658 addr1, l, mr);
2659 rcu_read_unlock();
2662 return result;
2665 /* Called within RCU critical section. */
2666 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2667 MemTxAttrs attrs, uint8_t *buf,
2668 int len, hwaddr addr1, hwaddr l,
2669 MemoryRegion *mr)
2671 uint8_t *ptr;
2672 uint64_t val;
2673 MemTxResult result = MEMTX_OK;
2674 bool release_lock = false;
2676 for (;;) {
2677 if (!memory_access_is_direct(mr, false)) {
2678 /* I/O case */
2679 release_lock |= prepare_mmio_access(mr);
2680 l = memory_access_size(mr, l, addr1);
2681 switch (l) {
2682 case 8:
2683 /* 64 bit read access */
2684 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2685 attrs);
2686 stq_p(buf, val);
2687 break;
2688 case 4:
2689 /* 32 bit read access */
2690 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2691 attrs);
2692 stl_p(buf, val);
2693 break;
2694 case 2:
2695 /* 16 bit read access */
2696 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2697 attrs);
2698 stw_p(buf, val);
2699 break;
2700 case 1:
2701 /* 8 bit read access */
2702 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2703 attrs);
2704 stb_p(buf, val);
2705 break;
2706 default:
2707 abort();
2709 } else {
2710 /* RAM case */
2711 ptr = qemu_get_ram_ptr(mr->ram_block,
2712 memory_region_get_ram_addr(mr) + addr1);
2713 memcpy(buf, ptr, l);
2716 if (release_lock) {
2717 qemu_mutex_unlock_iothread();
2718 release_lock = false;
2721 len -= l;
2722 buf += l;
2723 addr += l;
2725 if (!len) {
2726 break;
2729 l = len;
2730 mr = address_space_translate(as, addr, &addr1, &l, false);
2733 return result;
2736 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2737 MemTxAttrs attrs, uint8_t *buf, int len)
2739 hwaddr l;
2740 hwaddr addr1;
2741 MemoryRegion *mr;
2742 MemTxResult result = MEMTX_OK;
2744 if (len > 0) {
2745 rcu_read_lock();
2746 l = len;
2747 mr = address_space_translate(as, addr, &addr1, &l, false);
2748 result = address_space_read_continue(as, addr, attrs, buf, len,
2749 addr1, l, mr);
2750 rcu_read_unlock();
2753 return result;
2756 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2757 uint8_t *buf, int len, bool is_write)
2759 if (is_write) {
2760 return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2761 } else {
2762 return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2766 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2767 int len, int is_write)
2769 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2770 buf, len, is_write);
2773 enum write_rom_type {
2774 WRITE_DATA,
2775 FLUSH_CACHE,
2778 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2779 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2781 hwaddr l;
2782 uint8_t *ptr;
2783 hwaddr addr1;
2784 MemoryRegion *mr;
2786 rcu_read_lock();
2787 while (len > 0) {
2788 l = len;
2789 mr = address_space_translate(as, addr, &addr1, &l, true);
2791 if (!(memory_region_is_ram(mr) ||
2792 memory_region_is_romd(mr))) {
2793 l = memory_access_size(mr, l, addr1);
2794 } else {
2795 addr1 += memory_region_get_ram_addr(mr);
2796 /* ROM/RAM case */
2797 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
2798 switch (type) {
2799 case WRITE_DATA:
2800 memcpy(ptr, buf, l);
2801 invalidate_and_set_dirty(mr, addr1, l);
2802 break;
2803 case FLUSH_CACHE:
2804 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2805 break;
2808 len -= l;
2809 buf += l;
2810 addr += l;
2812 rcu_read_unlock();
2815 /* used for ROM loading : can write in RAM and ROM */
2816 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2817 const uint8_t *buf, int len)
2819 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2822 void cpu_flush_icache_range(hwaddr start, int len)
2825 * This function should do the same thing as an icache flush that was
2826 * triggered from within the guest. For TCG we are always cache coherent,
2827 * so there is no need to flush anything. For KVM / Xen we need to flush
2828 * the host's instruction cache at least.
2830 if (tcg_enabled()) {
2831 return;
2834 cpu_physical_memory_write_rom_internal(&address_space_memory,
2835 start, NULL, len, FLUSH_CACHE);
2838 typedef struct {
2839 MemoryRegion *mr;
2840 void *buffer;
2841 hwaddr addr;
2842 hwaddr len;
2843 bool in_use;
2844 } BounceBuffer;
2846 static BounceBuffer bounce;
2848 typedef struct MapClient {
2849 QEMUBH *bh;
2850 QLIST_ENTRY(MapClient) link;
2851 } MapClient;
2853 QemuMutex map_client_list_lock;
2854 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2855 = QLIST_HEAD_INITIALIZER(map_client_list);
2857 static void cpu_unregister_map_client_do(MapClient *client)
2859 QLIST_REMOVE(client, link);
2860 g_free(client);
2863 static void cpu_notify_map_clients_locked(void)
2865 MapClient *client;
2867 while (!QLIST_EMPTY(&map_client_list)) {
2868 client = QLIST_FIRST(&map_client_list);
2869 qemu_bh_schedule(client->bh);
2870 cpu_unregister_map_client_do(client);
2874 void cpu_register_map_client(QEMUBH *bh)
2876 MapClient *client = g_malloc(sizeof(*client));
2878 qemu_mutex_lock(&map_client_list_lock);
2879 client->bh = bh;
2880 QLIST_INSERT_HEAD(&map_client_list, client, link);
2881 if (!atomic_read(&bounce.in_use)) {
2882 cpu_notify_map_clients_locked();
2884 qemu_mutex_unlock(&map_client_list_lock);
2887 void cpu_exec_init_all(void)
2889 qemu_mutex_init(&ram_list.mutex);
2890 io_mem_init();
2891 memory_map_init();
2892 qemu_mutex_init(&map_client_list_lock);
2895 void cpu_unregister_map_client(QEMUBH *bh)
2897 MapClient *client;
2899 qemu_mutex_lock(&map_client_list_lock);
2900 QLIST_FOREACH(client, &map_client_list, link) {
2901 if (client->bh == bh) {
2902 cpu_unregister_map_client_do(client);
2903 break;
2906 qemu_mutex_unlock(&map_client_list_lock);
2909 static void cpu_notify_map_clients(void)
2911 qemu_mutex_lock(&map_client_list_lock);
2912 cpu_notify_map_clients_locked();
2913 qemu_mutex_unlock(&map_client_list_lock);
2916 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2918 MemoryRegion *mr;
2919 hwaddr l, xlat;
2921 rcu_read_lock();
2922 while (len > 0) {
2923 l = len;
2924 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2925 if (!memory_access_is_direct(mr, is_write)) {
2926 l = memory_access_size(mr, l, addr);
2927 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2928 return false;
2932 len -= l;
2933 addr += l;
2935 rcu_read_unlock();
2936 return true;
2939 /* Map a physical memory region into a host virtual address.
2940 * May map a subset of the requested range, given by and returned in *plen.
2941 * May return NULL if resources needed to perform the mapping are exhausted.
2942 * Use only for reads OR writes - not for read-modify-write operations.
2943 * Use cpu_register_map_client() to know when retrying the map operation is
2944 * likely to succeed.
2946 void *address_space_map(AddressSpace *as,
2947 hwaddr addr,
2948 hwaddr *plen,
2949 bool is_write)
2951 hwaddr len = *plen;
2952 hwaddr done = 0;
2953 hwaddr l, xlat, base;
2954 MemoryRegion *mr, *this_mr;
2955 ram_addr_t raddr;
2956 void *ptr;
2958 if (len == 0) {
2959 return NULL;
2962 l = len;
2963 rcu_read_lock();
2964 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2966 if (!memory_access_is_direct(mr, is_write)) {
2967 if (atomic_xchg(&bounce.in_use, true)) {
2968 rcu_read_unlock();
2969 return NULL;
2971 /* Avoid unbounded allocations */
2972 l = MIN(l, TARGET_PAGE_SIZE);
2973 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2974 bounce.addr = addr;
2975 bounce.len = l;
2977 memory_region_ref(mr);
2978 bounce.mr = mr;
2979 if (!is_write) {
2980 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2981 bounce.buffer, l);
2984 rcu_read_unlock();
2985 *plen = l;
2986 return bounce.buffer;
2989 base = xlat;
2990 raddr = memory_region_get_ram_addr(mr);
2992 for (;;) {
2993 len -= l;
2994 addr += l;
2995 done += l;
2996 if (len == 0) {
2997 break;
3000 l = len;
3001 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
3002 if (this_mr != mr || xlat != base + done) {
3003 break;
3007 memory_region_ref(mr);
3008 *plen = done;
3009 ptr = qemu_ram_ptr_length(mr->ram_block, raddr + base, plen);
3010 rcu_read_unlock();
3012 return ptr;
3015 /* Unmaps a memory region previously mapped by address_space_map().
3016 * Will also mark the memory as dirty if is_write == 1. access_len gives
3017 * the amount of memory that was actually read or written by the caller.
3019 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3020 int is_write, hwaddr access_len)
3022 if (buffer != bounce.buffer) {
3023 MemoryRegion *mr;
3024 ram_addr_t addr1;
3026 mr = qemu_ram_addr_from_host(buffer, &addr1);
3027 assert(mr != NULL);
3028 if (is_write) {
3029 invalidate_and_set_dirty(mr, addr1, access_len);
3031 if (xen_enabled()) {
3032 xen_invalidate_map_cache_entry(buffer);
3034 memory_region_unref(mr);
3035 return;
3037 if (is_write) {
3038 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
3039 bounce.buffer, access_len);
3041 qemu_vfree(bounce.buffer);
3042 bounce.buffer = NULL;
3043 memory_region_unref(bounce.mr);
3044 atomic_mb_set(&bounce.in_use, false);
3045 cpu_notify_map_clients();
3048 void *cpu_physical_memory_map(hwaddr addr,
3049 hwaddr *plen,
3050 int is_write)
3052 return address_space_map(&address_space_memory, addr, plen, is_write);
3055 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3056 int is_write, hwaddr access_len)
3058 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3061 /* warning: addr must be aligned */
3062 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3063 MemTxAttrs attrs,
3064 MemTxResult *result,
3065 enum device_endian endian)
3067 uint8_t *ptr;
3068 uint64_t val;
3069 MemoryRegion *mr;
3070 hwaddr l = 4;
3071 hwaddr addr1;
3072 MemTxResult r;
3073 bool release_lock = false;
3075 rcu_read_lock();
3076 mr = address_space_translate(as, addr, &addr1, &l, false);
3077 if (l < 4 || !memory_access_is_direct(mr, false)) {
3078 release_lock |= prepare_mmio_access(mr);
3080 /* I/O case */
3081 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3082 #if defined(TARGET_WORDS_BIGENDIAN)
3083 if (endian == DEVICE_LITTLE_ENDIAN) {
3084 val = bswap32(val);
3086 #else
3087 if (endian == DEVICE_BIG_ENDIAN) {
3088 val = bswap32(val);
3090 #endif
3091 } else {
3092 /* RAM case */
3093 ptr = qemu_get_ram_ptr(mr->ram_block,
3094 (memory_region_get_ram_addr(mr)
3095 & TARGET_PAGE_MASK)
3096 + addr1);
3097 switch (endian) {
3098 case DEVICE_LITTLE_ENDIAN:
3099 val = ldl_le_p(ptr);
3100 break;
3101 case DEVICE_BIG_ENDIAN:
3102 val = ldl_be_p(ptr);
3103 break;
3104 default:
3105 val = ldl_p(ptr);
3106 break;
3108 r = MEMTX_OK;
3110 if (result) {
3111 *result = r;
3113 if (release_lock) {
3114 qemu_mutex_unlock_iothread();
3116 rcu_read_unlock();
3117 return val;
3120 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3121 MemTxAttrs attrs, MemTxResult *result)
3123 return address_space_ldl_internal(as, addr, attrs, result,
3124 DEVICE_NATIVE_ENDIAN);
3127 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3128 MemTxAttrs attrs, MemTxResult *result)
3130 return address_space_ldl_internal(as, addr, attrs, result,
3131 DEVICE_LITTLE_ENDIAN);
3134 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3135 MemTxAttrs attrs, MemTxResult *result)
3137 return address_space_ldl_internal(as, addr, attrs, result,
3138 DEVICE_BIG_ENDIAN);
3141 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3143 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3146 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3148 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3151 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3153 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3156 /* warning: addr must be aligned */
3157 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3158 MemTxAttrs attrs,
3159 MemTxResult *result,
3160 enum device_endian endian)
3162 uint8_t *ptr;
3163 uint64_t val;
3164 MemoryRegion *mr;
3165 hwaddr l = 8;
3166 hwaddr addr1;
3167 MemTxResult r;
3168 bool release_lock = false;
3170 rcu_read_lock();
3171 mr = address_space_translate(as, addr, &addr1, &l,
3172 false);
3173 if (l < 8 || !memory_access_is_direct(mr, false)) {
3174 release_lock |= prepare_mmio_access(mr);
3176 /* I/O case */
3177 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3178 #if defined(TARGET_WORDS_BIGENDIAN)
3179 if (endian == DEVICE_LITTLE_ENDIAN) {
3180 val = bswap64(val);
3182 #else
3183 if (endian == DEVICE_BIG_ENDIAN) {
3184 val = bswap64(val);
3186 #endif
3187 } else {
3188 /* RAM case */
3189 ptr = qemu_get_ram_ptr(mr->ram_block,
3190 (memory_region_get_ram_addr(mr)
3191 & TARGET_PAGE_MASK)
3192 + addr1);
3193 switch (endian) {
3194 case DEVICE_LITTLE_ENDIAN:
3195 val = ldq_le_p(ptr);
3196 break;
3197 case DEVICE_BIG_ENDIAN:
3198 val = ldq_be_p(ptr);
3199 break;
3200 default:
3201 val = ldq_p(ptr);
3202 break;
3204 r = MEMTX_OK;
3206 if (result) {
3207 *result = r;
3209 if (release_lock) {
3210 qemu_mutex_unlock_iothread();
3212 rcu_read_unlock();
3213 return val;
3216 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3217 MemTxAttrs attrs, MemTxResult *result)
3219 return address_space_ldq_internal(as, addr, attrs, result,
3220 DEVICE_NATIVE_ENDIAN);
3223 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3224 MemTxAttrs attrs, MemTxResult *result)
3226 return address_space_ldq_internal(as, addr, attrs, result,
3227 DEVICE_LITTLE_ENDIAN);
3230 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3231 MemTxAttrs attrs, MemTxResult *result)
3233 return address_space_ldq_internal(as, addr, attrs, result,
3234 DEVICE_BIG_ENDIAN);
3237 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3239 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3242 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3244 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3247 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3249 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3252 /* XXX: optimize */
3253 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3254 MemTxAttrs attrs, MemTxResult *result)
3256 uint8_t val;
3257 MemTxResult r;
3259 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3260 if (result) {
3261 *result = r;
3263 return val;
3266 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3268 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3271 /* warning: addr must be aligned */
3272 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3273 hwaddr addr,
3274 MemTxAttrs attrs,
3275 MemTxResult *result,
3276 enum device_endian endian)
3278 uint8_t *ptr;
3279 uint64_t val;
3280 MemoryRegion *mr;
3281 hwaddr l = 2;
3282 hwaddr addr1;
3283 MemTxResult r;
3284 bool release_lock = false;
3286 rcu_read_lock();
3287 mr = address_space_translate(as, addr, &addr1, &l,
3288 false);
3289 if (l < 2 || !memory_access_is_direct(mr, false)) {
3290 release_lock |= prepare_mmio_access(mr);
3292 /* I/O case */
3293 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3294 #if defined(TARGET_WORDS_BIGENDIAN)
3295 if (endian == DEVICE_LITTLE_ENDIAN) {
3296 val = bswap16(val);
3298 #else
3299 if (endian == DEVICE_BIG_ENDIAN) {
3300 val = bswap16(val);
3302 #endif
3303 } else {
3304 /* RAM case */
3305 ptr = qemu_get_ram_ptr(mr->ram_block,
3306 (memory_region_get_ram_addr(mr)
3307 & TARGET_PAGE_MASK)
3308 + addr1);
3309 switch (endian) {
3310 case DEVICE_LITTLE_ENDIAN:
3311 val = lduw_le_p(ptr);
3312 break;
3313 case DEVICE_BIG_ENDIAN:
3314 val = lduw_be_p(ptr);
3315 break;
3316 default:
3317 val = lduw_p(ptr);
3318 break;
3320 r = MEMTX_OK;
3322 if (result) {
3323 *result = r;
3325 if (release_lock) {
3326 qemu_mutex_unlock_iothread();
3328 rcu_read_unlock();
3329 return val;
3332 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3333 MemTxAttrs attrs, MemTxResult *result)
3335 return address_space_lduw_internal(as, addr, attrs, result,
3336 DEVICE_NATIVE_ENDIAN);
3339 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3340 MemTxAttrs attrs, MemTxResult *result)
3342 return address_space_lduw_internal(as, addr, attrs, result,
3343 DEVICE_LITTLE_ENDIAN);
3346 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3347 MemTxAttrs attrs, MemTxResult *result)
3349 return address_space_lduw_internal(as, addr, attrs, result,
3350 DEVICE_BIG_ENDIAN);
3353 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3355 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3358 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3360 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3363 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3365 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3368 /* warning: addr must be aligned. The ram page is not masked as dirty
3369 and the code inside is not invalidated. It is useful if the dirty
3370 bits are used to track modified PTEs */
3371 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3372 MemTxAttrs attrs, MemTxResult *result)
3374 uint8_t *ptr;
3375 MemoryRegion *mr;
3376 hwaddr l = 4;
3377 hwaddr addr1;
3378 MemTxResult r;
3379 uint8_t dirty_log_mask;
3380 bool release_lock = false;
3382 rcu_read_lock();
3383 mr = address_space_translate(as, addr, &addr1, &l,
3384 true);
3385 if (l < 4 || !memory_access_is_direct(mr, true)) {
3386 release_lock |= prepare_mmio_access(mr);
3388 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3389 } else {
3390 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3391 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3392 stl_p(ptr, val);
3394 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3395 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3396 cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3397 r = MEMTX_OK;
3399 if (result) {
3400 *result = r;
3402 if (release_lock) {
3403 qemu_mutex_unlock_iothread();
3405 rcu_read_unlock();
3408 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3410 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3413 /* warning: addr must be aligned */
3414 static inline void address_space_stl_internal(AddressSpace *as,
3415 hwaddr addr, uint32_t val,
3416 MemTxAttrs attrs,
3417 MemTxResult *result,
3418 enum device_endian endian)
3420 uint8_t *ptr;
3421 MemoryRegion *mr;
3422 hwaddr l = 4;
3423 hwaddr addr1;
3424 MemTxResult r;
3425 bool release_lock = false;
3427 rcu_read_lock();
3428 mr = address_space_translate(as, addr, &addr1, &l,
3429 true);
3430 if (l < 4 || !memory_access_is_direct(mr, true)) {
3431 release_lock |= prepare_mmio_access(mr);
3433 #if defined(TARGET_WORDS_BIGENDIAN)
3434 if (endian == DEVICE_LITTLE_ENDIAN) {
3435 val = bswap32(val);
3437 #else
3438 if (endian == DEVICE_BIG_ENDIAN) {
3439 val = bswap32(val);
3441 #endif
3442 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3443 } else {
3444 /* RAM case */
3445 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3446 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3447 switch (endian) {
3448 case DEVICE_LITTLE_ENDIAN:
3449 stl_le_p(ptr, val);
3450 break;
3451 case DEVICE_BIG_ENDIAN:
3452 stl_be_p(ptr, val);
3453 break;
3454 default:
3455 stl_p(ptr, val);
3456 break;
3458 invalidate_and_set_dirty(mr, addr1, 4);
3459 r = MEMTX_OK;
3461 if (result) {
3462 *result = r;
3464 if (release_lock) {
3465 qemu_mutex_unlock_iothread();
3467 rcu_read_unlock();
3470 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3471 MemTxAttrs attrs, MemTxResult *result)
3473 address_space_stl_internal(as, addr, val, attrs, result,
3474 DEVICE_NATIVE_ENDIAN);
3477 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3478 MemTxAttrs attrs, MemTxResult *result)
3480 address_space_stl_internal(as, addr, val, attrs, result,
3481 DEVICE_LITTLE_ENDIAN);
3484 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3485 MemTxAttrs attrs, MemTxResult *result)
3487 address_space_stl_internal(as, addr, val, attrs, result,
3488 DEVICE_BIG_ENDIAN);
3491 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3493 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3496 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3498 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3501 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3503 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3506 /* XXX: optimize */
3507 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3508 MemTxAttrs attrs, MemTxResult *result)
3510 uint8_t v = val;
3511 MemTxResult r;
3513 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3514 if (result) {
3515 *result = r;
3519 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3521 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3524 /* warning: addr must be aligned */
3525 static inline void address_space_stw_internal(AddressSpace *as,
3526 hwaddr addr, uint32_t val,
3527 MemTxAttrs attrs,
3528 MemTxResult *result,
3529 enum device_endian endian)
3531 uint8_t *ptr;
3532 MemoryRegion *mr;
3533 hwaddr l = 2;
3534 hwaddr addr1;
3535 MemTxResult r;
3536 bool release_lock = false;
3538 rcu_read_lock();
3539 mr = address_space_translate(as, addr, &addr1, &l, true);
3540 if (l < 2 || !memory_access_is_direct(mr, true)) {
3541 release_lock |= prepare_mmio_access(mr);
3543 #if defined(TARGET_WORDS_BIGENDIAN)
3544 if (endian == DEVICE_LITTLE_ENDIAN) {
3545 val = bswap16(val);
3547 #else
3548 if (endian == DEVICE_BIG_ENDIAN) {
3549 val = bswap16(val);
3551 #endif
3552 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3553 } else {
3554 /* RAM case */
3555 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3556 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3557 switch (endian) {
3558 case DEVICE_LITTLE_ENDIAN:
3559 stw_le_p(ptr, val);
3560 break;
3561 case DEVICE_BIG_ENDIAN:
3562 stw_be_p(ptr, val);
3563 break;
3564 default:
3565 stw_p(ptr, val);
3566 break;
3568 invalidate_and_set_dirty(mr, addr1, 2);
3569 r = MEMTX_OK;
3571 if (result) {
3572 *result = r;
3574 if (release_lock) {
3575 qemu_mutex_unlock_iothread();
3577 rcu_read_unlock();
3580 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3581 MemTxAttrs attrs, MemTxResult *result)
3583 address_space_stw_internal(as, addr, val, attrs, result,
3584 DEVICE_NATIVE_ENDIAN);
3587 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3588 MemTxAttrs attrs, MemTxResult *result)
3590 address_space_stw_internal(as, addr, val, attrs, result,
3591 DEVICE_LITTLE_ENDIAN);
3594 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3595 MemTxAttrs attrs, MemTxResult *result)
3597 address_space_stw_internal(as, addr, val, attrs, result,
3598 DEVICE_BIG_ENDIAN);
3601 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3603 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3606 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3608 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3611 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3613 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3616 /* XXX: optimize */
3617 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3618 MemTxAttrs attrs, MemTxResult *result)
3620 MemTxResult r;
3621 val = tswap64(val);
3622 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3623 if (result) {
3624 *result = r;
3628 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3629 MemTxAttrs attrs, MemTxResult *result)
3631 MemTxResult r;
3632 val = cpu_to_le64(val);
3633 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3634 if (result) {
3635 *result = r;
3638 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3639 MemTxAttrs attrs, MemTxResult *result)
3641 MemTxResult r;
3642 val = cpu_to_be64(val);
3643 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3644 if (result) {
3645 *result = r;
3649 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3651 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3654 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3656 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3659 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3661 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3664 /* virtual memory access for debug (includes writing to ROM) */
3665 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3666 uint8_t *buf, int len, int is_write)
3668 int l;
3669 hwaddr phys_addr;
3670 target_ulong page;
3672 while (len > 0) {
3673 int asidx;
3674 MemTxAttrs attrs;
3676 page = addr & TARGET_PAGE_MASK;
3677 phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3678 asidx = cpu_asidx_from_attrs(cpu, attrs);
3679 /* if no physical page mapped, return an error */
3680 if (phys_addr == -1)
3681 return -1;
3682 l = (page + TARGET_PAGE_SIZE) - addr;
3683 if (l > len)
3684 l = len;
3685 phys_addr += (addr & ~TARGET_PAGE_MASK);
3686 if (is_write) {
3687 cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3688 phys_addr, buf, l);
3689 } else {
3690 address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3691 MEMTXATTRS_UNSPECIFIED,
3692 buf, l, 0);
3694 len -= l;
3695 buf += l;
3696 addr += l;
3698 return 0;
3702 * Allows code that needs to deal with migration bitmaps etc to still be built
3703 * target independent.
3705 size_t qemu_target_page_bits(void)
3707 return TARGET_PAGE_BITS;
3710 #endif
3713 * A helper function for the _utterly broken_ virtio device model to find out if
3714 * it's running on a big endian machine. Don't do this at home kids!
3716 bool target_words_bigendian(void);
3717 bool target_words_bigendian(void)
3719 #if defined(TARGET_WORDS_BIGENDIAN)
3720 return true;
3721 #else
3722 return false;
3723 #endif
3726 #ifndef CONFIG_USER_ONLY
3727 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3729 MemoryRegion*mr;
3730 hwaddr l = 1;
3731 bool res;
3733 rcu_read_lock();
3734 mr = address_space_translate(&address_space_memory,
3735 phys_addr, &phys_addr, &l, false);
3737 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3738 rcu_read_unlock();
3739 return res;
3742 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3744 RAMBlock *block;
3745 int ret = 0;
3747 rcu_read_lock();
3748 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3749 ret = func(block->idstr, block->host, block->offset,
3750 block->used_length, opaque);
3751 if (ret) {
3752 break;
3755 rcu_read_unlock();
3756 return ret;
3758 #endif