block/vpc: give option to force the current_size field in .bdrv_create
[qemu/kevin.git] / exec.c
blobf09dd4e9284a54c83a746351646bb4458c4a4480
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #ifndef _WIN32
21 #include <sys/mman.h>
22 #endif
24 #include "qemu-common.h"
25 #include "cpu.h"
26 #include "tcg.h"
27 #include "hw/hw.h"
28 #if !defined(CONFIG_USER_ONLY)
29 #include "hw/boards.h"
30 #endif
31 #include "hw/qdev.h"
32 #include "sysemu/kvm.h"
33 #include "sysemu/sysemu.h"
34 #include "hw/xen/xen.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "qemu/error-report.h"
38 #include "exec/memory.h"
39 #include "sysemu/dma.h"
40 #include "exec/address-spaces.h"
41 #if defined(CONFIG_USER_ONLY)
42 #include <qemu.h>
43 #else /* !CONFIG_USER_ONLY */
44 #include "sysemu/xen-mapcache.h"
45 #include "trace.h"
46 #endif
47 #include "exec/cpu-all.h"
48 #include "qemu/rcu_queue.h"
49 #include "qemu/main-loop.h"
50 #include "translate-all.h"
51 #include "sysemu/replay.h"
53 #include "exec/memory-internal.h"
54 #include "exec/ram_addr.h"
55 #include "exec/log.h"
57 #include "qemu/range.h"
58 #ifndef _WIN32
59 #include "qemu/mmap-alloc.h"
60 #endif
62 //#define DEBUG_SUBPAGE
64 #if !defined(CONFIG_USER_ONLY)
65 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
66 * are protected by the ramlist lock.
68 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
70 static MemoryRegion *system_memory;
71 static MemoryRegion *system_io;
73 AddressSpace address_space_io;
74 AddressSpace address_space_memory;
76 MemoryRegion io_mem_rom, io_mem_notdirty;
77 static MemoryRegion io_mem_unassigned;
79 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
80 #define RAM_PREALLOC (1 << 0)
82 /* RAM is mmap-ed with MAP_SHARED */
83 #define RAM_SHARED (1 << 1)
85 /* Only a portion of RAM (used_length) is actually used, and migrated.
86 * This used_length size can change across reboots.
88 #define RAM_RESIZEABLE (1 << 2)
90 #endif
92 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
93 /* current CPU in the current thread. It is only valid inside
94 cpu_exec() */
95 __thread CPUState *current_cpu;
96 /* 0 = Do not count executed instructions.
97 1 = Precise instruction counting.
98 2 = Adaptive rate instruction counting. */
99 int use_icount;
101 #if !defined(CONFIG_USER_ONLY)
103 typedef struct PhysPageEntry PhysPageEntry;
105 struct PhysPageEntry {
106 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
107 uint32_t skip : 6;
108 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
109 uint32_t ptr : 26;
112 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
114 /* Size of the L2 (and L3, etc) page tables. */
115 #define ADDR_SPACE_BITS 64
117 #define P_L2_BITS 9
118 #define P_L2_SIZE (1 << P_L2_BITS)
120 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
122 typedef PhysPageEntry Node[P_L2_SIZE];
124 typedef struct PhysPageMap {
125 struct rcu_head rcu;
127 unsigned sections_nb;
128 unsigned sections_nb_alloc;
129 unsigned nodes_nb;
130 unsigned nodes_nb_alloc;
131 Node *nodes;
132 MemoryRegionSection *sections;
133 } PhysPageMap;
135 struct AddressSpaceDispatch {
136 struct rcu_head rcu;
138 MemoryRegionSection *mru_section;
139 /* This is a multi-level map on the physical address space.
140 * The bottom level has pointers to MemoryRegionSections.
142 PhysPageEntry phys_map;
143 PhysPageMap map;
144 AddressSpace *as;
147 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
148 typedef struct subpage_t {
149 MemoryRegion iomem;
150 AddressSpace *as;
151 hwaddr base;
152 uint16_t sub_section[TARGET_PAGE_SIZE];
153 } subpage_t;
155 #define PHYS_SECTION_UNASSIGNED 0
156 #define PHYS_SECTION_NOTDIRTY 1
157 #define PHYS_SECTION_ROM 2
158 #define PHYS_SECTION_WATCH 3
160 static void io_mem_init(void);
161 static void memory_map_init(void);
162 static void tcg_commit(MemoryListener *listener);
164 static MemoryRegion io_mem_watch;
167 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
168 * @cpu: the CPU whose AddressSpace this is
169 * @as: the AddressSpace itself
170 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
171 * @tcg_as_listener: listener for tracking changes to the AddressSpace
173 struct CPUAddressSpace {
174 CPUState *cpu;
175 AddressSpace *as;
176 struct AddressSpaceDispatch *memory_dispatch;
177 MemoryListener tcg_as_listener;
180 #endif
182 #if !defined(CONFIG_USER_ONLY)
184 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
186 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
187 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
188 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
189 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
193 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
195 unsigned i;
196 uint32_t ret;
197 PhysPageEntry e;
198 PhysPageEntry *p;
200 ret = map->nodes_nb++;
201 p = map->nodes[ret];
202 assert(ret != PHYS_MAP_NODE_NIL);
203 assert(ret != map->nodes_nb_alloc);
205 e.skip = leaf ? 0 : 1;
206 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
207 for (i = 0; i < P_L2_SIZE; ++i) {
208 memcpy(&p[i], &e, sizeof(e));
210 return ret;
213 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
214 hwaddr *index, hwaddr *nb, uint16_t leaf,
215 int level)
217 PhysPageEntry *p;
218 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
220 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
221 lp->ptr = phys_map_node_alloc(map, level == 0);
223 p = map->nodes[lp->ptr];
224 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
226 while (*nb && lp < &p[P_L2_SIZE]) {
227 if ((*index & (step - 1)) == 0 && *nb >= step) {
228 lp->skip = 0;
229 lp->ptr = leaf;
230 *index += step;
231 *nb -= step;
232 } else {
233 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
235 ++lp;
239 static void phys_page_set(AddressSpaceDispatch *d,
240 hwaddr index, hwaddr nb,
241 uint16_t leaf)
243 /* Wildly overreserve - it doesn't matter much. */
244 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
246 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
249 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
250 * and update our entry so we can skip it and go directly to the destination.
252 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
254 unsigned valid_ptr = P_L2_SIZE;
255 int valid = 0;
256 PhysPageEntry *p;
257 int i;
259 if (lp->ptr == PHYS_MAP_NODE_NIL) {
260 return;
263 p = nodes[lp->ptr];
264 for (i = 0; i < P_L2_SIZE; i++) {
265 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
266 continue;
269 valid_ptr = i;
270 valid++;
271 if (p[i].skip) {
272 phys_page_compact(&p[i], nodes, compacted);
276 /* We can only compress if there's only one child. */
277 if (valid != 1) {
278 return;
281 assert(valid_ptr < P_L2_SIZE);
283 /* Don't compress if it won't fit in the # of bits we have. */
284 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
285 return;
288 lp->ptr = p[valid_ptr].ptr;
289 if (!p[valid_ptr].skip) {
290 /* If our only child is a leaf, make this a leaf. */
291 /* By design, we should have made this node a leaf to begin with so we
292 * should never reach here.
293 * But since it's so simple to handle this, let's do it just in case we
294 * change this rule.
296 lp->skip = 0;
297 } else {
298 lp->skip += p[valid_ptr].skip;
302 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
304 DECLARE_BITMAP(compacted, nodes_nb);
306 if (d->phys_map.skip) {
307 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
311 static inline bool section_covers_addr(const MemoryRegionSection *section,
312 hwaddr addr)
314 /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
315 * the section must cover the entire address space.
317 return section->size.hi ||
318 range_covers_byte(section->offset_within_address_space,
319 section->size.lo, addr);
322 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
323 Node *nodes, MemoryRegionSection *sections)
325 PhysPageEntry *p;
326 hwaddr index = addr >> TARGET_PAGE_BITS;
327 int i;
329 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
330 if (lp.ptr == PHYS_MAP_NODE_NIL) {
331 return &sections[PHYS_SECTION_UNASSIGNED];
333 p = nodes[lp.ptr];
334 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
337 if (section_covers_addr(&sections[lp.ptr], addr)) {
338 return &sections[lp.ptr];
339 } else {
340 return &sections[PHYS_SECTION_UNASSIGNED];
344 bool memory_region_is_unassigned(MemoryRegion *mr)
346 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
347 && mr != &io_mem_watch;
350 /* Called from RCU critical section */
351 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
352 hwaddr addr,
353 bool resolve_subpage)
355 MemoryRegionSection *section = atomic_read(&d->mru_section);
356 subpage_t *subpage;
357 bool update;
359 if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
360 section_covers_addr(section, addr)) {
361 update = false;
362 } else {
363 section = phys_page_find(d->phys_map, addr, d->map.nodes,
364 d->map.sections);
365 update = true;
367 if (resolve_subpage && section->mr->subpage) {
368 subpage = container_of(section->mr, subpage_t, iomem);
369 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
371 if (update) {
372 atomic_set(&d->mru_section, section);
374 return section;
377 /* Called from RCU critical section */
378 static MemoryRegionSection *
379 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
380 hwaddr *plen, bool resolve_subpage)
382 MemoryRegionSection *section;
383 MemoryRegion *mr;
384 Int128 diff;
386 section = address_space_lookup_region(d, addr, resolve_subpage);
387 /* Compute offset within MemoryRegionSection */
388 addr -= section->offset_within_address_space;
390 /* Compute offset within MemoryRegion */
391 *xlat = addr + section->offset_within_region;
393 mr = section->mr;
395 /* MMIO registers can be expected to perform full-width accesses based only
396 * on their address, without considering adjacent registers that could
397 * decode to completely different MemoryRegions. When such registers
398 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
399 * regions overlap wildly. For this reason we cannot clamp the accesses
400 * here.
402 * If the length is small (as is the case for address_space_ldl/stl),
403 * everything works fine. If the incoming length is large, however,
404 * the caller really has to do the clamping through memory_access_size.
406 if (memory_region_is_ram(mr)) {
407 diff = int128_sub(section->size, int128_make64(addr));
408 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
410 return section;
413 /* Called from RCU critical section */
414 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
415 hwaddr *xlat, hwaddr *plen,
416 bool is_write)
418 IOMMUTLBEntry iotlb;
419 MemoryRegionSection *section;
420 MemoryRegion *mr;
422 for (;;) {
423 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
424 section = address_space_translate_internal(d, addr, &addr, plen, true);
425 mr = section->mr;
427 if (!mr->iommu_ops) {
428 break;
431 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
432 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
433 | (addr & iotlb.addr_mask));
434 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
435 if (!(iotlb.perm & (1 << is_write))) {
436 mr = &io_mem_unassigned;
437 break;
440 as = iotlb.target_as;
443 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
444 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
445 *plen = MIN(page, *plen);
448 *xlat = addr;
449 return mr;
452 /* Called from RCU critical section */
453 MemoryRegionSection *
454 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
455 hwaddr *xlat, hwaddr *plen)
457 MemoryRegionSection *section;
458 AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;
460 section = address_space_translate_internal(d, addr, xlat, plen, false);
462 assert(!section->mr->iommu_ops);
463 return section;
465 #endif
467 #if !defined(CONFIG_USER_ONLY)
469 static int cpu_common_post_load(void *opaque, int version_id)
471 CPUState *cpu = opaque;
473 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
474 version_id is increased. */
475 cpu->interrupt_request &= ~0x01;
476 tlb_flush(cpu, 1);
478 return 0;
481 static int cpu_common_pre_load(void *opaque)
483 CPUState *cpu = opaque;
485 cpu->exception_index = -1;
487 return 0;
490 static bool cpu_common_exception_index_needed(void *opaque)
492 CPUState *cpu = opaque;
494 return tcg_enabled() && cpu->exception_index != -1;
497 static const VMStateDescription vmstate_cpu_common_exception_index = {
498 .name = "cpu_common/exception_index",
499 .version_id = 1,
500 .minimum_version_id = 1,
501 .needed = cpu_common_exception_index_needed,
502 .fields = (VMStateField[]) {
503 VMSTATE_INT32(exception_index, CPUState),
504 VMSTATE_END_OF_LIST()
508 static bool cpu_common_crash_occurred_needed(void *opaque)
510 CPUState *cpu = opaque;
512 return cpu->crash_occurred;
515 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
516 .name = "cpu_common/crash_occurred",
517 .version_id = 1,
518 .minimum_version_id = 1,
519 .needed = cpu_common_crash_occurred_needed,
520 .fields = (VMStateField[]) {
521 VMSTATE_BOOL(crash_occurred, CPUState),
522 VMSTATE_END_OF_LIST()
526 const VMStateDescription vmstate_cpu_common = {
527 .name = "cpu_common",
528 .version_id = 1,
529 .minimum_version_id = 1,
530 .pre_load = cpu_common_pre_load,
531 .post_load = cpu_common_post_load,
532 .fields = (VMStateField[]) {
533 VMSTATE_UINT32(halted, CPUState),
534 VMSTATE_UINT32(interrupt_request, CPUState),
535 VMSTATE_END_OF_LIST()
537 .subsections = (const VMStateDescription*[]) {
538 &vmstate_cpu_common_exception_index,
539 &vmstate_cpu_common_crash_occurred,
540 NULL
544 #endif
546 CPUState *qemu_get_cpu(int index)
548 CPUState *cpu;
550 CPU_FOREACH(cpu) {
551 if (cpu->cpu_index == index) {
552 return cpu;
556 return NULL;
559 #if !defined(CONFIG_USER_ONLY)
560 void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
562 CPUAddressSpace *newas;
564 /* Target code should have set num_ases before calling us */
565 assert(asidx < cpu->num_ases);
567 if (asidx == 0) {
568 /* address space 0 gets the convenience alias */
569 cpu->as = as;
572 /* KVM cannot currently support multiple address spaces. */
573 assert(asidx == 0 || !kvm_enabled());
575 if (!cpu->cpu_ases) {
576 cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
579 newas = &cpu->cpu_ases[asidx];
580 newas->cpu = cpu;
581 newas->as = as;
582 if (tcg_enabled()) {
583 newas->tcg_as_listener.commit = tcg_commit;
584 memory_listener_register(&newas->tcg_as_listener, as);
588 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
590 /* Return the AddressSpace corresponding to the specified index */
591 return cpu->cpu_ases[asidx].as;
593 #endif
595 #ifndef CONFIG_USER_ONLY
596 static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
598 static int cpu_get_free_index(Error **errp)
600 int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
602 if (cpu >= MAX_CPUMASK_BITS) {
603 error_setg(errp, "Trying to use more CPUs than max of %d",
604 MAX_CPUMASK_BITS);
605 return -1;
608 bitmap_set(cpu_index_map, cpu, 1);
609 return cpu;
612 void cpu_exec_exit(CPUState *cpu)
614 if (cpu->cpu_index == -1) {
615 /* cpu_index was never allocated by this @cpu or was already freed. */
616 return;
619 bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
620 cpu->cpu_index = -1;
622 #else
624 static int cpu_get_free_index(Error **errp)
626 CPUState *some_cpu;
627 int cpu_index = 0;
629 CPU_FOREACH(some_cpu) {
630 cpu_index++;
632 return cpu_index;
635 void cpu_exec_exit(CPUState *cpu)
638 #endif
640 void cpu_exec_init(CPUState *cpu, Error **errp)
642 CPUClass *cc = CPU_GET_CLASS(cpu);
643 int cpu_index;
644 Error *local_err = NULL;
646 cpu->as = NULL;
647 cpu->num_ases = 0;
649 #ifndef CONFIG_USER_ONLY
650 cpu->thread_id = qemu_get_thread_id();
652 /* This is a softmmu CPU object, so create a property for it
653 * so users can wire up its memory. (This can't go in qom/cpu.c
654 * because that file is compiled only once for both user-mode
655 * and system builds.) The default if no link is set up is to use
656 * the system address space.
658 object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
659 (Object **)&cpu->memory,
660 qdev_prop_allow_set_link_before_realize,
661 OBJ_PROP_LINK_UNREF_ON_RELEASE,
662 &error_abort);
663 cpu->memory = system_memory;
664 object_ref(OBJECT(cpu->memory));
665 #endif
667 #if defined(CONFIG_USER_ONLY)
668 cpu_list_lock();
669 #endif
670 cpu_index = cpu->cpu_index = cpu_get_free_index(&local_err);
671 if (local_err) {
672 error_propagate(errp, local_err);
673 #if defined(CONFIG_USER_ONLY)
674 cpu_list_unlock();
675 #endif
676 return;
678 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
679 #if defined(CONFIG_USER_ONLY)
680 cpu_list_unlock();
681 #endif
682 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
683 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
685 if (cc->vmsd != NULL) {
686 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
690 #if defined(CONFIG_USER_ONLY)
691 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
693 tb_invalidate_phys_page_range(pc, pc + 1, 0);
695 #else
696 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
698 MemTxAttrs attrs;
699 hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
700 int asidx = cpu_asidx_from_attrs(cpu, attrs);
701 if (phys != -1) {
702 tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
703 phys | (pc & ~TARGET_PAGE_MASK));
706 #endif
708 #if defined(CONFIG_USER_ONLY)
709 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
714 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
715 int flags)
717 return -ENOSYS;
720 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
724 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
725 int flags, CPUWatchpoint **watchpoint)
727 return -ENOSYS;
729 #else
730 /* Add a watchpoint. */
731 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
732 int flags, CPUWatchpoint **watchpoint)
734 CPUWatchpoint *wp;
736 /* forbid ranges which are empty or run off the end of the address space */
737 if (len == 0 || (addr + len - 1) < addr) {
738 error_report("tried to set invalid watchpoint at %"
739 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
740 return -EINVAL;
742 wp = g_malloc(sizeof(*wp));
744 wp->vaddr = addr;
745 wp->len = len;
746 wp->flags = flags;
748 /* keep all GDB-injected watchpoints in front */
749 if (flags & BP_GDB) {
750 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
751 } else {
752 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
755 tlb_flush_page(cpu, addr);
757 if (watchpoint)
758 *watchpoint = wp;
759 return 0;
762 /* Remove a specific watchpoint. */
763 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
764 int flags)
766 CPUWatchpoint *wp;
768 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
769 if (addr == wp->vaddr && len == wp->len
770 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
771 cpu_watchpoint_remove_by_ref(cpu, wp);
772 return 0;
775 return -ENOENT;
778 /* Remove a specific watchpoint by reference. */
779 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
781 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
783 tlb_flush_page(cpu, watchpoint->vaddr);
785 g_free(watchpoint);
788 /* Remove all matching watchpoints. */
789 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
791 CPUWatchpoint *wp, *next;
793 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
794 if (wp->flags & mask) {
795 cpu_watchpoint_remove_by_ref(cpu, wp);
800 /* Return true if this watchpoint address matches the specified
801 * access (ie the address range covered by the watchpoint overlaps
802 * partially or completely with the address range covered by the
803 * access).
805 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
806 vaddr addr,
807 vaddr len)
809 /* We know the lengths are non-zero, but a little caution is
810 * required to avoid errors in the case where the range ends
811 * exactly at the top of the address space and so addr + len
812 * wraps round to zero.
814 vaddr wpend = wp->vaddr + wp->len - 1;
815 vaddr addrend = addr + len - 1;
817 return !(addr > wpend || wp->vaddr > addrend);
820 #endif
822 /* Add a breakpoint. */
823 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
824 CPUBreakpoint **breakpoint)
826 CPUBreakpoint *bp;
828 bp = g_malloc(sizeof(*bp));
830 bp->pc = pc;
831 bp->flags = flags;
833 /* keep all GDB-injected breakpoints in front */
834 if (flags & BP_GDB) {
835 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
836 } else {
837 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
840 breakpoint_invalidate(cpu, pc);
842 if (breakpoint) {
843 *breakpoint = bp;
845 return 0;
848 /* Remove a specific breakpoint. */
849 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
851 CPUBreakpoint *bp;
853 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
854 if (bp->pc == pc && bp->flags == flags) {
855 cpu_breakpoint_remove_by_ref(cpu, bp);
856 return 0;
859 return -ENOENT;
862 /* Remove a specific breakpoint by reference. */
863 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
865 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
867 breakpoint_invalidate(cpu, breakpoint->pc);
869 g_free(breakpoint);
872 /* Remove all matching breakpoints. */
873 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
875 CPUBreakpoint *bp, *next;
877 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
878 if (bp->flags & mask) {
879 cpu_breakpoint_remove_by_ref(cpu, bp);
884 /* enable or disable single step mode. EXCP_DEBUG is returned by the
885 CPU loop after each instruction */
886 void cpu_single_step(CPUState *cpu, int enabled)
888 if (cpu->singlestep_enabled != enabled) {
889 cpu->singlestep_enabled = enabled;
890 if (kvm_enabled()) {
891 kvm_update_guest_debug(cpu, 0);
892 } else {
893 /* must flush all the translated code to avoid inconsistencies */
894 /* XXX: only flush what is necessary */
895 tb_flush(cpu);
900 void cpu_abort(CPUState *cpu, const char *fmt, ...)
902 va_list ap;
903 va_list ap2;
905 va_start(ap, fmt);
906 va_copy(ap2, ap);
907 fprintf(stderr, "qemu: fatal: ");
908 vfprintf(stderr, fmt, ap);
909 fprintf(stderr, "\n");
910 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
911 if (qemu_log_separate()) {
912 qemu_log("qemu: fatal: ");
913 qemu_log_vprintf(fmt, ap2);
914 qemu_log("\n");
915 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
916 qemu_log_flush();
917 qemu_log_close();
919 va_end(ap2);
920 va_end(ap);
921 replay_finish();
922 #if defined(CONFIG_USER_ONLY)
924 struct sigaction act;
925 sigfillset(&act.sa_mask);
926 act.sa_handler = SIG_DFL;
927 sigaction(SIGABRT, &act, NULL);
929 #endif
930 abort();
933 #if !defined(CONFIG_USER_ONLY)
934 /* Called from RCU critical section */
935 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
937 RAMBlock *block;
939 block = atomic_rcu_read(&ram_list.mru_block);
940 if (block && addr - block->offset < block->max_length) {
941 return block;
943 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
944 if (addr - block->offset < block->max_length) {
945 goto found;
949 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
950 abort();
952 found:
953 /* It is safe to write mru_block outside the iothread lock. This
954 * is what happens:
956 * mru_block = xxx
957 * rcu_read_unlock()
958 * xxx removed from list
959 * rcu_read_lock()
960 * read mru_block
961 * mru_block = NULL;
962 * call_rcu(reclaim_ramblock, xxx);
963 * rcu_read_unlock()
965 * atomic_rcu_set is not needed here. The block was already published
966 * when it was placed into the list. Here we're just making an extra
967 * copy of the pointer.
969 ram_list.mru_block = block;
970 return block;
973 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
975 CPUState *cpu;
976 ram_addr_t start1;
977 RAMBlock *block;
978 ram_addr_t end;
980 end = TARGET_PAGE_ALIGN(start + length);
981 start &= TARGET_PAGE_MASK;
983 rcu_read_lock();
984 block = qemu_get_ram_block(start);
985 assert(block == qemu_get_ram_block(end - 1));
986 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
987 CPU_FOREACH(cpu) {
988 tlb_reset_dirty(cpu, start1, length);
990 rcu_read_unlock();
993 /* Note: start and end must be within the same ram block. */
994 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
995 ram_addr_t length,
996 unsigned client)
998 DirtyMemoryBlocks *blocks;
999 unsigned long end, page;
1000 bool dirty = false;
1002 if (length == 0) {
1003 return false;
1006 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
1007 page = start >> TARGET_PAGE_BITS;
1009 rcu_read_lock();
1011 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1013 while (page < end) {
1014 unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1015 unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1016 unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1018 dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1019 offset, num);
1020 page += num;
1023 rcu_read_unlock();
1025 if (dirty && tcg_enabled()) {
1026 tlb_reset_dirty_range_all(start, length);
1029 return dirty;
1032 /* Called from RCU critical section */
1033 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1034 MemoryRegionSection *section,
1035 target_ulong vaddr,
1036 hwaddr paddr, hwaddr xlat,
1037 int prot,
1038 target_ulong *address)
1040 hwaddr iotlb;
1041 CPUWatchpoint *wp;
1043 if (memory_region_is_ram(section->mr)) {
1044 /* Normal RAM. */
1045 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1046 + xlat;
1047 if (!section->readonly) {
1048 iotlb |= PHYS_SECTION_NOTDIRTY;
1049 } else {
1050 iotlb |= PHYS_SECTION_ROM;
1052 } else {
1053 AddressSpaceDispatch *d;
1055 d = atomic_rcu_read(&section->address_space->dispatch);
1056 iotlb = section - d->map.sections;
1057 iotlb += xlat;
1060 /* Make accesses to pages with watchpoints go via the
1061 watchpoint trap routines. */
1062 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1063 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1064 /* Avoid trapping reads of pages with a write breakpoint. */
1065 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1066 iotlb = PHYS_SECTION_WATCH + paddr;
1067 *address |= TLB_MMIO;
1068 break;
1073 return iotlb;
1075 #endif /* defined(CONFIG_USER_ONLY) */
1077 #if !defined(CONFIG_USER_ONLY)
1079 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1080 uint16_t section);
1081 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1083 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1084 qemu_anon_ram_alloc;
1087 * Set a custom physical guest memory alloator.
1088 * Accelerators with unusual needs may need this. Hopefully, we can
1089 * get rid of it eventually.
1091 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1093 phys_mem_alloc = alloc;
1096 static uint16_t phys_section_add(PhysPageMap *map,
1097 MemoryRegionSection *section)
1099 /* The physical section number is ORed with a page-aligned
1100 * pointer to produce the iotlb entries. Thus it should
1101 * never overflow into the page-aligned value.
1103 assert(map->sections_nb < TARGET_PAGE_SIZE);
1105 if (map->sections_nb == map->sections_nb_alloc) {
1106 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1107 map->sections = g_renew(MemoryRegionSection, map->sections,
1108 map->sections_nb_alloc);
1110 map->sections[map->sections_nb] = *section;
1111 memory_region_ref(section->mr);
1112 return map->sections_nb++;
1115 static void phys_section_destroy(MemoryRegion *mr)
1117 bool have_sub_page = mr->subpage;
1119 memory_region_unref(mr);
1121 if (have_sub_page) {
1122 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1123 object_unref(OBJECT(&subpage->iomem));
1124 g_free(subpage);
1128 static void phys_sections_free(PhysPageMap *map)
1130 while (map->sections_nb > 0) {
1131 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1132 phys_section_destroy(section->mr);
1134 g_free(map->sections);
1135 g_free(map->nodes);
1138 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1140 subpage_t *subpage;
1141 hwaddr base = section->offset_within_address_space
1142 & TARGET_PAGE_MASK;
1143 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1144 d->map.nodes, d->map.sections);
1145 MemoryRegionSection subsection = {
1146 .offset_within_address_space = base,
1147 .size = int128_make64(TARGET_PAGE_SIZE),
1149 hwaddr start, end;
1151 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1153 if (!(existing->mr->subpage)) {
1154 subpage = subpage_init(d->as, base);
1155 subsection.address_space = d->as;
1156 subsection.mr = &subpage->iomem;
1157 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1158 phys_section_add(&d->map, &subsection));
1159 } else {
1160 subpage = container_of(existing->mr, subpage_t, iomem);
1162 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1163 end = start + int128_get64(section->size) - 1;
1164 subpage_register(subpage, start, end,
1165 phys_section_add(&d->map, section));
1169 static void register_multipage(AddressSpaceDispatch *d,
1170 MemoryRegionSection *section)
1172 hwaddr start_addr = section->offset_within_address_space;
1173 uint16_t section_index = phys_section_add(&d->map, section);
1174 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1175 TARGET_PAGE_BITS));
1177 assert(num_pages);
1178 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1181 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1183 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1184 AddressSpaceDispatch *d = as->next_dispatch;
1185 MemoryRegionSection now = *section, remain = *section;
1186 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1188 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1189 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1190 - now.offset_within_address_space;
1192 now.size = int128_min(int128_make64(left), now.size);
1193 register_subpage(d, &now);
1194 } else {
1195 now.size = int128_zero();
1197 while (int128_ne(remain.size, now.size)) {
1198 remain.size = int128_sub(remain.size, now.size);
1199 remain.offset_within_address_space += int128_get64(now.size);
1200 remain.offset_within_region += int128_get64(now.size);
1201 now = remain;
1202 if (int128_lt(remain.size, page_size)) {
1203 register_subpage(d, &now);
1204 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1205 now.size = page_size;
1206 register_subpage(d, &now);
1207 } else {
1208 now.size = int128_and(now.size, int128_neg(page_size));
1209 register_multipage(d, &now);
1214 void qemu_flush_coalesced_mmio_buffer(void)
1216 if (kvm_enabled())
1217 kvm_flush_coalesced_mmio_buffer();
1220 void qemu_mutex_lock_ramlist(void)
1222 qemu_mutex_lock(&ram_list.mutex);
1225 void qemu_mutex_unlock_ramlist(void)
1227 qemu_mutex_unlock(&ram_list.mutex);
1230 #ifdef __linux__
1232 #include <sys/vfs.h>
1234 #define HUGETLBFS_MAGIC 0x958458f6
1236 static long gethugepagesize(const char *path, Error **errp)
1238 struct statfs fs;
1239 int ret;
1241 do {
1242 ret = statfs(path, &fs);
1243 } while (ret != 0 && errno == EINTR);
1245 if (ret != 0) {
1246 error_setg_errno(errp, errno, "failed to get page size of file %s",
1247 path);
1248 return 0;
1251 return fs.f_bsize;
1254 static void *file_ram_alloc(RAMBlock *block,
1255 ram_addr_t memory,
1256 const char *path,
1257 Error **errp)
1259 struct stat st;
1260 char *filename;
1261 char *sanitized_name;
1262 char *c;
1263 void *area;
1264 int fd;
1265 uint64_t hpagesize;
1266 Error *local_err = NULL;
1268 hpagesize = gethugepagesize(path, &local_err);
1269 if (local_err) {
1270 error_propagate(errp, local_err);
1271 goto error;
1273 block->mr->align = hpagesize;
1275 if (memory < hpagesize) {
1276 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1277 "or larger than huge page size 0x%" PRIx64,
1278 memory, hpagesize);
1279 goto error;
1282 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1283 error_setg(errp,
1284 "host lacks kvm mmu notifiers, -mem-path unsupported");
1285 goto error;
1288 if (!stat(path, &st) && S_ISDIR(st.st_mode)) {
1289 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1290 sanitized_name = g_strdup(memory_region_name(block->mr));
1291 for (c = sanitized_name; *c != '\0'; c++) {
1292 if (*c == '/') {
1293 *c = '_';
1297 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1298 sanitized_name);
1299 g_free(sanitized_name);
1301 fd = mkstemp(filename);
1302 if (fd >= 0) {
1303 unlink(filename);
1305 g_free(filename);
1306 } else {
1307 fd = open(path, O_RDWR | O_CREAT, 0644);
1310 if (fd < 0) {
1311 error_setg_errno(errp, errno,
1312 "unable to create backing store for hugepages");
1313 goto error;
1316 memory = ROUND_UP(memory, hpagesize);
1319 * ftruncate is not supported by hugetlbfs in older
1320 * hosts, so don't bother bailing out on errors.
1321 * If anything goes wrong with it under other filesystems,
1322 * mmap will fail.
1324 if (ftruncate(fd, memory)) {
1325 perror("ftruncate");
1328 area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
1329 if (area == MAP_FAILED) {
1330 error_setg_errno(errp, errno,
1331 "unable to map backing store for hugepages");
1332 close(fd);
1333 goto error;
1336 if (mem_prealloc) {
1337 os_mem_prealloc(fd, area, memory);
1340 block->fd = fd;
1341 return area;
1343 error:
1344 return NULL;
1346 #endif
1348 /* Called with the ramlist lock held. */
1349 static ram_addr_t find_ram_offset(ram_addr_t size)
1351 RAMBlock *block, *next_block;
1352 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1354 assert(size != 0); /* it would hand out same offset multiple times */
1356 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1357 return 0;
1360 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1361 ram_addr_t end, next = RAM_ADDR_MAX;
1363 end = block->offset + block->max_length;
1365 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1366 if (next_block->offset >= end) {
1367 next = MIN(next, next_block->offset);
1370 if (next - end >= size && next - end < mingap) {
1371 offset = end;
1372 mingap = next - end;
1376 if (offset == RAM_ADDR_MAX) {
1377 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1378 (uint64_t)size);
1379 abort();
1382 return offset;
1385 ram_addr_t last_ram_offset(void)
1387 RAMBlock *block;
1388 ram_addr_t last = 0;
1390 rcu_read_lock();
1391 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1392 last = MAX(last, block->offset + block->max_length);
1394 rcu_read_unlock();
1395 return last;
1398 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1400 int ret;
1402 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1403 if (!machine_dump_guest_core(current_machine)) {
1404 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1405 if (ret) {
1406 perror("qemu_madvise");
1407 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1408 "but dump_guest_core=off specified\n");
1413 /* Called within an RCU critical section, or while the ramlist lock
1414 * is held.
1416 static RAMBlock *find_ram_block(ram_addr_t addr)
1418 RAMBlock *block;
1420 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1421 if (block->offset == addr) {
1422 return block;
1426 return NULL;
1429 const char *qemu_ram_get_idstr(RAMBlock *rb)
1431 return rb->idstr;
1434 /* Called with iothread lock held. */
1435 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1437 RAMBlock *new_block, *block;
1439 rcu_read_lock();
1440 new_block = find_ram_block(addr);
1441 assert(new_block);
1442 assert(!new_block->idstr[0]);
1444 if (dev) {
1445 char *id = qdev_get_dev_path(dev);
1446 if (id) {
1447 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1448 g_free(id);
1451 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1453 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1454 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1455 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1456 new_block->idstr);
1457 abort();
1460 rcu_read_unlock();
1463 /* Called with iothread lock held. */
1464 void qemu_ram_unset_idstr(ram_addr_t addr)
1466 RAMBlock *block;
1468 /* FIXME: arch_init.c assumes that this is not called throughout
1469 * migration. Ignore the problem since hot-unplug during migration
1470 * does not work anyway.
1473 rcu_read_lock();
1474 block = find_ram_block(addr);
1475 if (block) {
1476 memset(block->idstr, 0, sizeof(block->idstr));
1478 rcu_read_unlock();
1481 static int memory_try_enable_merging(void *addr, size_t len)
1483 if (!machine_mem_merge(current_machine)) {
1484 /* disabled by the user */
1485 return 0;
1488 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1491 /* Only legal before guest might have detected the memory size: e.g. on
1492 * incoming migration, or right after reset.
1494 * As memory core doesn't know how is memory accessed, it is up to
1495 * resize callback to update device state and/or add assertions to detect
1496 * misuse, if necessary.
1498 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1500 RAMBlock *block = find_ram_block(base);
1502 assert(block);
1504 newsize = HOST_PAGE_ALIGN(newsize);
1506 if (block->used_length == newsize) {
1507 return 0;
1510 if (!(block->flags & RAM_RESIZEABLE)) {
1511 error_setg_errno(errp, EINVAL,
1512 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1513 " in != 0x" RAM_ADDR_FMT, block->idstr,
1514 newsize, block->used_length);
1515 return -EINVAL;
1518 if (block->max_length < newsize) {
1519 error_setg_errno(errp, EINVAL,
1520 "Length too large: %s: 0x" RAM_ADDR_FMT
1521 " > 0x" RAM_ADDR_FMT, block->idstr,
1522 newsize, block->max_length);
1523 return -EINVAL;
1526 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1527 block->used_length = newsize;
1528 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1529 DIRTY_CLIENTS_ALL);
1530 memory_region_set_size(block->mr, newsize);
1531 if (block->resized) {
1532 block->resized(block->idstr, newsize, block->host);
1534 return 0;
1537 /* Called with ram_list.mutex held */
1538 static void dirty_memory_extend(ram_addr_t old_ram_size,
1539 ram_addr_t new_ram_size)
1541 ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1542 DIRTY_MEMORY_BLOCK_SIZE);
1543 ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1544 DIRTY_MEMORY_BLOCK_SIZE);
1545 int i;
1547 /* Only need to extend if block count increased */
1548 if (new_num_blocks <= old_num_blocks) {
1549 return;
1552 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1553 DirtyMemoryBlocks *old_blocks;
1554 DirtyMemoryBlocks *new_blocks;
1555 int j;
1557 old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1558 new_blocks = g_malloc(sizeof(*new_blocks) +
1559 sizeof(new_blocks->blocks[0]) * new_num_blocks);
1561 if (old_num_blocks) {
1562 memcpy(new_blocks->blocks, old_blocks->blocks,
1563 old_num_blocks * sizeof(old_blocks->blocks[0]));
1566 for (j = old_num_blocks; j < new_num_blocks; j++) {
1567 new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1570 atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1572 if (old_blocks) {
1573 g_free_rcu(old_blocks, rcu);
1578 static void ram_block_add(RAMBlock *new_block, Error **errp)
1580 RAMBlock *block;
1581 RAMBlock *last_block = NULL;
1582 ram_addr_t old_ram_size, new_ram_size;
1583 Error *err = NULL;
1585 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1587 qemu_mutex_lock_ramlist();
1588 new_block->offset = find_ram_offset(new_block->max_length);
1590 if (!new_block->host) {
1591 if (xen_enabled()) {
1592 xen_ram_alloc(new_block->offset, new_block->max_length,
1593 new_block->mr, &err);
1594 if (err) {
1595 error_propagate(errp, err);
1596 qemu_mutex_unlock_ramlist();
1598 } else {
1599 new_block->host = phys_mem_alloc(new_block->max_length,
1600 &new_block->mr->align);
1601 if (!new_block->host) {
1602 error_setg_errno(errp, errno,
1603 "cannot set up guest memory '%s'",
1604 memory_region_name(new_block->mr));
1605 qemu_mutex_unlock_ramlist();
1607 memory_try_enable_merging(new_block->host, new_block->max_length);
1611 new_ram_size = MAX(old_ram_size,
1612 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1613 if (new_ram_size > old_ram_size) {
1614 migration_bitmap_extend(old_ram_size, new_ram_size);
1615 dirty_memory_extend(old_ram_size, new_ram_size);
1617 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1618 * QLIST (which has an RCU-friendly variant) does not have insertion at
1619 * tail, so save the last element in last_block.
1621 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1622 last_block = block;
1623 if (block->max_length < new_block->max_length) {
1624 break;
1627 if (block) {
1628 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1629 } else if (last_block) {
1630 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1631 } else { /* list is empty */
1632 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1634 ram_list.mru_block = NULL;
1636 /* Write list before version */
1637 smp_wmb();
1638 ram_list.version++;
1639 qemu_mutex_unlock_ramlist();
1641 cpu_physical_memory_set_dirty_range(new_block->offset,
1642 new_block->used_length,
1643 DIRTY_CLIENTS_ALL);
1645 if (new_block->host) {
1646 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1647 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1648 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1649 if (kvm_enabled()) {
1650 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1655 #ifdef __linux__
1656 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1657 bool share, const char *mem_path,
1658 Error **errp)
1660 RAMBlock *new_block;
1661 Error *local_err = NULL;
1663 if (xen_enabled()) {
1664 error_setg(errp, "-mem-path not supported with Xen");
1665 return NULL;
1668 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1670 * file_ram_alloc() needs to allocate just like
1671 * phys_mem_alloc, but we haven't bothered to provide
1672 * a hook there.
1674 error_setg(errp,
1675 "-mem-path not supported with this accelerator");
1676 return NULL;
1679 size = HOST_PAGE_ALIGN(size);
1680 new_block = g_malloc0(sizeof(*new_block));
1681 new_block->mr = mr;
1682 new_block->used_length = size;
1683 new_block->max_length = size;
1684 new_block->flags = share ? RAM_SHARED : 0;
1685 new_block->host = file_ram_alloc(new_block, size,
1686 mem_path, errp);
1687 if (!new_block->host) {
1688 g_free(new_block);
1689 return NULL;
1692 ram_block_add(new_block, &local_err);
1693 if (local_err) {
1694 g_free(new_block);
1695 error_propagate(errp, local_err);
1696 return NULL;
1698 return new_block;
1700 #endif
1702 static
1703 RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1704 void (*resized)(const char*,
1705 uint64_t length,
1706 void *host),
1707 void *host, bool resizeable,
1708 MemoryRegion *mr, Error **errp)
1710 RAMBlock *new_block;
1711 Error *local_err = NULL;
1713 size = HOST_PAGE_ALIGN(size);
1714 max_size = HOST_PAGE_ALIGN(max_size);
1715 new_block = g_malloc0(sizeof(*new_block));
1716 new_block->mr = mr;
1717 new_block->resized = resized;
1718 new_block->used_length = size;
1719 new_block->max_length = max_size;
1720 assert(max_size >= size);
1721 new_block->fd = -1;
1722 new_block->host = host;
1723 if (host) {
1724 new_block->flags |= RAM_PREALLOC;
1726 if (resizeable) {
1727 new_block->flags |= RAM_RESIZEABLE;
1729 ram_block_add(new_block, &local_err);
1730 if (local_err) {
1731 g_free(new_block);
1732 error_propagate(errp, local_err);
1733 return NULL;
1735 return new_block;
1738 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1739 MemoryRegion *mr, Error **errp)
1741 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1744 RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1746 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1749 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1750 void (*resized)(const char*,
1751 uint64_t length,
1752 void *host),
1753 MemoryRegion *mr, Error **errp)
1755 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1758 static void reclaim_ramblock(RAMBlock *block)
1760 if (block->flags & RAM_PREALLOC) {
1762 } else if (xen_enabled()) {
1763 xen_invalidate_map_cache_entry(block->host);
1764 #ifndef _WIN32
1765 } else if (block->fd >= 0) {
1766 qemu_ram_munmap(block->host, block->max_length);
1767 close(block->fd);
1768 #endif
1769 } else {
1770 qemu_anon_ram_free(block->host, block->max_length);
1772 g_free(block);
1775 void qemu_ram_free(RAMBlock *block)
1777 qemu_mutex_lock_ramlist();
1778 QLIST_REMOVE_RCU(block, next);
1779 ram_list.mru_block = NULL;
1780 /* Write list before version */
1781 smp_wmb();
1782 ram_list.version++;
1783 call_rcu(block, reclaim_ramblock, rcu);
1784 qemu_mutex_unlock_ramlist();
1787 #ifndef _WIN32
1788 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1790 RAMBlock *block;
1791 ram_addr_t offset;
1792 int flags;
1793 void *area, *vaddr;
1795 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1796 offset = addr - block->offset;
1797 if (offset < block->max_length) {
1798 vaddr = ramblock_ptr(block, offset);
1799 if (block->flags & RAM_PREALLOC) {
1801 } else if (xen_enabled()) {
1802 abort();
1803 } else {
1804 flags = MAP_FIXED;
1805 if (block->fd >= 0) {
1806 flags |= (block->flags & RAM_SHARED ?
1807 MAP_SHARED : MAP_PRIVATE);
1808 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1809 flags, block->fd, offset);
1810 } else {
1812 * Remap needs to match alloc. Accelerators that
1813 * set phys_mem_alloc never remap. If they did,
1814 * we'd need a remap hook here.
1816 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1818 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1819 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1820 flags, -1, 0);
1822 if (area != vaddr) {
1823 fprintf(stderr, "Could not remap addr: "
1824 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1825 length, addr);
1826 exit(1);
1828 memory_try_enable_merging(vaddr, length);
1829 qemu_ram_setup_dump(vaddr, length);
1834 #endif /* !_WIN32 */
1836 int qemu_get_ram_fd(ram_addr_t addr)
1838 RAMBlock *block;
1839 int fd;
1841 rcu_read_lock();
1842 block = qemu_get_ram_block(addr);
1843 fd = block->fd;
1844 rcu_read_unlock();
1845 return fd;
1848 void qemu_set_ram_fd(ram_addr_t addr, int fd)
1850 RAMBlock *block;
1852 rcu_read_lock();
1853 block = qemu_get_ram_block(addr);
1854 block->fd = fd;
1855 rcu_read_unlock();
1858 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1860 RAMBlock *block;
1861 void *ptr;
1863 rcu_read_lock();
1864 block = qemu_get_ram_block(addr);
1865 ptr = ramblock_ptr(block, 0);
1866 rcu_read_unlock();
1867 return ptr;
1870 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1871 * This should not be used for general purpose DMA. Use address_space_map
1872 * or address_space_rw instead. For local memory (e.g. video ram) that the
1873 * device owns, use memory_region_get_ram_ptr.
1875 * Called within RCU critical section.
1877 void *qemu_get_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1879 RAMBlock *block = ram_block;
1881 if (block == NULL) {
1882 block = qemu_get_ram_block(addr);
1885 if (xen_enabled() && block->host == NULL) {
1886 /* We need to check if the requested address is in the RAM
1887 * because we don't want to map the entire memory in QEMU.
1888 * In that case just map until the end of the page.
1890 if (block->offset == 0) {
1891 return xen_map_cache(addr, 0, 0);
1894 block->host = xen_map_cache(block->offset, block->max_length, 1);
1896 return ramblock_ptr(block, addr - block->offset);
1899 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1900 * but takes a size argument.
1902 * Called within RCU critical section.
1904 static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1905 hwaddr *size)
1907 RAMBlock *block = ram_block;
1908 ram_addr_t offset_inside_block;
1909 if (*size == 0) {
1910 return NULL;
1913 if (block == NULL) {
1914 block = qemu_get_ram_block(addr);
1916 offset_inside_block = addr - block->offset;
1917 *size = MIN(*size, block->max_length - offset_inside_block);
1919 if (xen_enabled() && block->host == NULL) {
1920 /* We need to check if the requested address is in the RAM
1921 * because we don't want to map the entire memory in QEMU.
1922 * In that case just map the requested area.
1924 if (block->offset == 0) {
1925 return xen_map_cache(addr, *size, 1);
1928 block->host = xen_map_cache(block->offset, block->max_length, 1);
1931 return ramblock_ptr(block, offset_inside_block);
1935 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1936 * in that RAMBlock.
1938 * ptr: Host pointer to look up
1939 * round_offset: If true round the result offset down to a page boundary
1940 * *ram_addr: set to result ram_addr
1941 * *offset: set to result offset within the RAMBlock
1943 * Returns: RAMBlock (or NULL if not found)
1945 * By the time this function returns, the returned pointer is not protected
1946 * by RCU anymore. If the caller is not within an RCU critical section and
1947 * does not hold the iothread lock, it must have other means of protecting the
1948 * pointer, such as a reference to the region that includes the incoming
1949 * ram_addr_t.
1951 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1952 ram_addr_t *ram_addr,
1953 ram_addr_t *offset)
1955 RAMBlock *block;
1956 uint8_t *host = ptr;
1958 if (xen_enabled()) {
1959 rcu_read_lock();
1960 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1961 block = qemu_get_ram_block(*ram_addr);
1962 if (block) {
1963 *offset = (host - block->host);
1965 rcu_read_unlock();
1966 return block;
1969 rcu_read_lock();
1970 block = atomic_rcu_read(&ram_list.mru_block);
1971 if (block && block->host && host - block->host < block->max_length) {
1972 goto found;
1975 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1976 /* This case append when the block is not mapped. */
1977 if (block->host == NULL) {
1978 continue;
1980 if (host - block->host < block->max_length) {
1981 goto found;
1985 rcu_read_unlock();
1986 return NULL;
1988 found:
1989 *offset = (host - block->host);
1990 if (round_offset) {
1991 *offset &= TARGET_PAGE_MASK;
1993 *ram_addr = block->offset + *offset;
1994 rcu_read_unlock();
1995 return block;
1999 * Finds the named RAMBlock
2001 * name: The name of RAMBlock to find
2003 * Returns: RAMBlock (or NULL if not found)
2005 RAMBlock *qemu_ram_block_by_name(const char *name)
2007 RAMBlock *block;
2009 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2010 if (!strcmp(name, block->idstr)) {
2011 return block;
2015 return NULL;
2018 /* Some of the softmmu routines need to translate from a host pointer
2019 (typically a TLB entry) back to a ram offset. */
2020 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2022 RAMBlock *block;
2023 ram_addr_t offset; /* Not used */
2025 block = qemu_ram_block_from_host(ptr, false, ram_addr, &offset);
2027 if (!block) {
2028 return NULL;
2031 return block->mr;
2034 /* Called within RCU critical section. */
2035 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2036 uint64_t val, unsigned size)
2038 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
2039 tb_invalidate_phys_page_fast(ram_addr, size);
2041 switch (size) {
2042 case 1:
2043 stb_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2044 break;
2045 case 2:
2046 stw_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2047 break;
2048 case 4:
2049 stl_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2050 break;
2051 default:
2052 abort();
2054 /* Set both VGA and migration bits for simplicity and to remove
2055 * the notdirty callback faster.
2057 cpu_physical_memory_set_dirty_range(ram_addr, size,
2058 DIRTY_CLIENTS_NOCODE);
2059 /* we remove the notdirty callback only if the code has been
2060 flushed */
2061 if (!cpu_physical_memory_is_clean(ram_addr)) {
2062 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2066 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2067 unsigned size, bool is_write)
2069 return is_write;
2072 static const MemoryRegionOps notdirty_mem_ops = {
2073 .write = notdirty_mem_write,
2074 .valid.accepts = notdirty_mem_accepts,
2075 .endianness = DEVICE_NATIVE_ENDIAN,
2078 /* Generate a debug exception if a watchpoint has been hit. */
2079 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2081 CPUState *cpu = current_cpu;
2082 CPUClass *cc = CPU_GET_CLASS(cpu);
2083 CPUArchState *env = cpu->env_ptr;
2084 target_ulong pc, cs_base;
2085 target_ulong vaddr;
2086 CPUWatchpoint *wp;
2087 int cpu_flags;
2089 if (cpu->watchpoint_hit) {
2090 /* We re-entered the check after replacing the TB. Now raise
2091 * the debug interrupt so that is will trigger after the
2092 * current instruction. */
2093 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2094 return;
2096 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2097 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2098 if (cpu_watchpoint_address_matches(wp, vaddr, len)
2099 && (wp->flags & flags)) {
2100 if (flags == BP_MEM_READ) {
2101 wp->flags |= BP_WATCHPOINT_HIT_READ;
2102 } else {
2103 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2105 wp->hitaddr = vaddr;
2106 wp->hitattrs = attrs;
2107 if (!cpu->watchpoint_hit) {
2108 if (wp->flags & BP_CPU &&
2109 !cc->debug_check_watchpoint(cpu, wp)) {
2110 wp->flags &= ~BP_WATCHPOINT_HIT;
2111 continue;
2113 cpu->watchpoint_hit = wp;
2114 tb_check_watchpoint(cpu);
2115 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2116 cpu->exception_index = EXCP_DEBUG;
2117 cpu_loop_exit(cpu);
2118 } else {
2119 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2120 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2121 cpu_resume_from_signal(cpu, NULL);
2124 } else {
2125 wp->flags &= ~BP_WATCHPOINT_HIT;
2130 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2131 so these check for a hit then pass through to the normal out-of-line
2132 phys routines. */
2133 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2134 unsigned size, MemTxAttrs attrs)
2136 MemTxResult res;
2137 uint64_t data;
2138 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2139 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2141 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2142 switch (size) {
2143 case 1:
2144 data = address_space_ldub(as, addr, attrs, &res);
2145 break;
2146 case 2:
2147 data = address_space_lduw(as, addr, attrs, &res);
2148 break;
2149 case 4:
2150 data = address_space_ldl(as, addr, attrs, &res);
2151 break;
2152 default: abort();
2154 *pdata = data;
2155 return res;
2158 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2159 uint64_t val, unsigned size,
2160 MemTxAttrs attrs)
2162 MemTxResult res;
2163 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2164 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2166 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2167 switch (size) {
2168 case 1:
2169 address_space_stb(as, addr, val, attrs, &res);
2170 break;
2171 case 2:
2172 address_space_stw(as, addr, val, attrs, &res);
2173 break;
2174 case 4:
2175 address_space_stl(as, addr, val, attrs, &res);
2176 break;
2177 default: abort();
2179 return res;
2182 static const MemoryRegionOps watch_mem_ops = {
2183 .read_with_attrs = watch_mem_read,
2184 .write_with_attrs = watch_mem_write,
2185 .endianness = DEVICE_NATIVE_ENDIAN,
2188 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2189 unsigned len, MemTxAttrs attrs)
2191 subpage_t *subpage = opaque;
2192 uint8_t buf[8];
2193 MemTxResult res;
2195 #if defined(DEBUG_SUBPAGE)
2196 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2197 subpage, len, addr);
2198 #endif
2199 res = address_space_read(subpage->as, addr + subpage->base,
2200 attrs, buf, len);
2201 if (res) {
2202 return res;
2204 switch (len) {
2205 case 1:
2206 *data = ldub_p(buf);
2207 return MEMTX_OK;
2208 case 2:
2209 *data = lduw_p(buf);
2210 return MEMTX_OK;
2211 case 4:
2212 *data = ldl_p(buf);
2213 return MEMTX_OK;
2214 case 8:
2215 *data = ldq_p(buf);
2216 return MEMTX_OK;
2217 default:
2218 abort();
2222 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2223 uint64_t value, unsigned len, MemTxAttrs attrs)
2225 subpage_t *subpage = opaque;
2226 uint8_t buf[8];
2228 #if defined(DEBUG_SUBPAGE)
2229 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2230 " value %"PRIx64"\n",
2231 __func__, subpage, len, addr, value);
2232 #endif
2233 switch (len) {
2234 case 1:
2235 stb_p(buf, value);
2236 break;
2237 case 2:
2238 stw_p(buf, value);
2239 break;
2240 case 4:
2241 stl_p(buf, value);
2242 break;
2243 case 8:
2244 stq_p(buf, value);
2245 break;
2246 default:
2247 abort();
2249 return address_space_write(subpage->as, addr + subpage->base,
2250 attrs, buf, len);
2253 static bool subpage_accepts(void *opaque, hwaddr addr,
2254 unsigned len, bool is_write)
2256 subpage_t *subpage = opaque;
2257 #if defined(DEBUG_SUBPAGE)
2258 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2259 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2260 #endif
2262 return address_space_access_valid(subpage->as, addr + subpage->base,
2263 len, is_write);
2266 static const MemoryRegionOps subpage_ops = {
2267 .read_with_attrs = subpage_read,
2268 .write_with_attrs = subpage_write,
2269 .impl.min_access_size = 1,
2270 .impl.max_access_size = 8,
2271 .valid.min_access_size = 1,
2272 .valid.max_access_size = 8,
2273 .valid.accepts = subpage_accepts,
2274 .endianness = DEVICE_NATIVE_ENDIAN,
2277 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2278 uint16_t section)
2280 int idx, eidx;
2282 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2283 return -1;
2284 idx = SUBPAGE_IDX(start);
2285 eidx = SUBPAGE_IDX(end);
2286 #if defined(DEBUG_SUBPAGE)
2287 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2288 __func__, mmio, start, end, idx, eidx, section);
2289 #endif
2290 for (; idx <= eidx; idx++) {
2291 mmio->sub_section[idx] = section;
2294 return 0;
2297 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2299 subpage_t *mmio;
2301 mmio = g_malloc0(sizeof(subpage_t));
2303 mmio->as = as;
2304 mmio->base = base;
2305 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2306 NULL, TARGET_PAGE_SIZE);
2307 mmio->iomem.subpage = true;
2308 #if defined(DEBUG_SUBPAGE)
2309 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2310 mmio, base, TARGET_PAGE_SIZE);
2311 #endif
2312 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2314 return mmio;
2317 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2318 MemoryRegion *mr)
2320 assert(as);
2321 MemoryRegionSection section = {
2322 .address_space = as,
2323 .mr = mr,
2324 .offset_within_address_space = 0,
2325 .offset_within_region = 0,
2326 .size = int128_2_64(),
2329 return phys_section_add(map, &section);
2332 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2334 int asidx = cpu_asidx_from_attrs(cpu, attrs);
2335 CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2336 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2337 MemoryRegionSection *sections = d->map.sections;
2339 return sections[index & ~TARGET_PAGE_MASK].mr;
2342 static void io_mem_init(void)
2344 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2345 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2346 NULL, UINT64_MAX);
2347 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2348 NULL, UINT64_MAX);
2349 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2350 NULL, UINT64_MAX);
2353 static void mem_begin(MemoryListener *listener)
2355 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2356 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2357 uint16_t n;
2359 n = dummy_section(&d->map, as, &io_mem_unassigned);
2360 assert(n == PHYS_SECTION_UNASSIGNED);
2361 n = dummy_section(&d->map, as, &io_mem_notdirty);
2362 assert(n == PHYS_SECTION_NOTDIRTY);
2363 n = dummy_section(&d->map, as, &io_mem_rom);
2364 assert(n == PHYS_SECTION_ROM);
2365 n = dummy_section(&d->map, as, &io_mem_watch);
2366 assert(n == PHYS_SECTION_WATCH);
2368 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2369 d->as = as;
2370 as->next_dispatch = d;
2373 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2375 phys_sections_free(&d->map);
2376 g_free(d);
2379 static void mem_commit(MemoryListener *listener)
2381 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2382 AddressSpaceDispatch *cur = as->dispatch;
2383 AddressSpaceDispatch *next = as->next_dispatch;
2385 phys_page_compact_all(next, next->map.nodes_nb);
2387 atomic_rcu_set(&as->dispatch, next);
2388 if (cur) {
2389 call_rcu(cur, address_space_dispatch_free, rcu);
2393 static void tcg_commit(MemoryListener *listener)
2395 CPUAddressSpace *cpuas;
2396 AddressSpaceDispatch *d;
2398 /* since each CPU stores ram addresses in its TLB cache, we must
2399 reset the modified entries */
2400 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2401 cpu_reloading_memory_map();
2402 /* The CPU and TLB are protected by the iothread lock.
2403 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2404 * may have split the RCU critical section.
2406 d = atomic_rcu_read(&cpuas->as->dispatch);
2407 cpuas->memory_dispatch = d;
2408 tlb_flush(cpuas->cpu, 1);
2411 void address_space_init_dispatch(AddressSpace *as)
2413 as->dispatch = NULL;
2414 as->dispatch_listener = (MemoryListener) {
2415 .begin = mem_begin,
2416 .commit = mem_commit,
2417 .region_add = mem_add,
2418 .region_nop = mem_add,
2419 .priority = 0,
2421 memory_listener_register(&as->dispatch_listener, as);
2424 void address_space_unregister(AddressSpace *as)
2426 memory_listener_unregister(&as->dispatch_listener);
2429 void address_space_destroy_dispatch(AddressSpace *as)
2431 AddressSpaceDispatch *d = as->dispatch;
2433 atomic_rcu_set(&as->dispatch, NULL);
2434 if (d) {
2435 call_rcu(d, address_space_dispatch_free, rcu);
2439 static void memory_map_init(void)
2441 system_memory = g_malloc(sizeof(*system_memory));
2443 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2444 address_space_init(&address_space_memory, system_memory, "memory");
2446 system_io = g_malloc(sizeof(*system_io));
2447 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2448 65536);
2449 address_space_init(&address_space_io, system_io, "I/O");
2452 MemoryRegion *get_system_memory(void)
2454 return system_memory;
2457 MemoryRegion *get_system_io(void)
2459 return system_io;
2462 #endif /* !defined(CONFIG_USER_ONLY) */
2464 /* physical memory access (slow version, mainly for debug) */
2465 #if defined(CONFIG_USER_ONLY)
2466 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2467 uint8_t *buf, int len, int is_write)
2469 int l, flags;
2470 target_ulong page;
2471 void * p;
2473 while (len > 0) {
2474 page = addr & TARGET_PAGE_MASK;
2475 l = (page + TARGET_PAGE_SIZE) - addr;
2476 if (l > len)
2477 l = len;
2478 flags = page_get_flags(page);
2479 if (!(flags & PAGE_VALID))
2480 return -1;
2481 if (is_write) {
2482 if (!(flags & PAGE_WRITE))
2483 return -1;
2484 /* XXX: this code should not depend on lock_user */
2485 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2486 return -1;
2487 memcpy(p, buf, l);
2488 unlock_user(p, addr, l);
2489 } else {
2490 if (!(flags & PAGE_READ))
2491 return -1;
2492 /* XXX: this code should not depend on lock_user */
2493 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2494 return -1;
2495 memcpy(buf, p, l);
2496 unlock_user(p, addr, 0);
2498 len -= l;
2499 buf += l;
2500 addr += l;
2502 return 0;
2505 #else
2507 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2508 hwaddr length)
2510 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2511 /* No early return if dirty_log_mask is or becomes 0, because
2512 * cpu_physical_memory_set_dirty_range will still call
2513 * xen_modified_memory.
2515 if (dirty_log_mask) {
2516 dirty_log_mask =
2517 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2519 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2520 tb_invalidate_phys_range(addr, addr + length);
2521 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2523 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2526 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2528 unsigned access_size_max = mr->ops->valid.max_access_size;
2530 /* Regions are assumed to support 1-4 byte accesses unless
2531 otherwise specified. */
2532 if (access_size_max == 0) {
2533 access_size_max = 4;
2536 /* Bound the maximum access by the alignment of the address. */
2537 if (!mr->ops->impl.unaligned) {
2538 unsigned align_size_max = addr & -addr;
2539 if (align_size_max != 0 && align_size_max < access_size_max) {
2540 access_size_max = align_size_max;
2544 /* Don't attempt accesses larger than the maximum. */
2545 if (l > access_size_max) {
2546 l = access_size_max;
2548 l = pow2floor(l);
2550 return l;
2553 static bool prepare_mmio_access(MemoryRegion *mr)
2555 bool unlocked = !qemu_mutex_iothread_locked();
2556 bool release_lock = false;
2558 if (unlocked && mr->global_locking) {
2559 qemu_mutex_lock_iothread();
2560 unlocked = false;
2561 release_lock = true;
2563 if (mr->flush_coalesced_mmio) {
2564 if (unlocked) {
2565 qemu_mutex_lock_iothread();
2567 qemu_flush_coalesced_mmio_buffer();
2568 if (unlocked) {
2569 qemu_mutex_unlock_iothread();
2573 return release_lock;
2576 /* Called within RCU critical section. */
2577 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2578 MemTxAttrs attrs,
2579 const uint8_t *buf,
2580 int len, hwaddr addr1,
2581 hwaddr l, MemoryRegion *mr)
2583 uint8_t *ptr;
2584 uint64_t val;
2585 MemTxResult result = MEMTX_OK;
2586 bool release_lock = false;
2588 for (;;) {
2589 if (!memory_access_is_direct(mr, true)) {
2590 release_lock |= prepare_mmio_access(mr);
2591 l = memory_access_size(mr, l, addr1);
2592 /* XXX: could force current_cpu to NULL to avoid
2593 potential bugs */
2594 switch (l) {
2595 case 8:
2596 /* 64 bit write access */
2597 val = ldq_p(buf);
2598 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2599 attrs);
2600 break;
2601 case 4:
2602 /* 32 bit write access */
2603 val = ldl_p(buf);
2604 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2605 attrs);
2606 break;
2607 case 2:
2608 /* 16 bit write access */
2609 val = lduw_p(buf);
2610 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2611 attrs);
2612 break;
2613 case 1:
2614 /* 8 bit write access */
2615 val = ldub_p(buf);
2616 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2617 attrs);
2618 break;
2619 default:
2620 abort();
2622 } else {
2623 addr1 += memory_region_get_ram_addr(mr);
2624 /* RAM case */
2625 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
2626 memcpy(ptr, buf, l);
2627 invalidate_and_set_dirty(mr, addr1, l);
2630 if (release_lock) {
2631 qemu_mutex_unlock_iothread();
2632 release_lock = false;
2635 len -= l;
2636 buf += l;
2637 addr += l;
2639 if (!len) {
2640 break;
2643 l = len;
2644 mr = address_space_translate(as, addr, &addr1, &l, true);
2647 return result;
2650 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2651 const uint8_t *buf, int len)
2653 hwaddr l;
2654 hwaddr addr1;
2655 MemoryRegion *mr;
2656 MemTxResult result = MEMTX_OK;
2658 if (len > 0) {
2659 rcu_read_lock();
2660 l = len;
2661 mr = address_space_translate(as, addr, &addr1, &l, true);
2662 result = address_space_write_continue(as, addr, attrs, buf, len,
2663 addr1, l, mr);
2664 rcu_read_unlock();
2667 return result;
2670 /* Called within RCU critical section. */
2671 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2672 MemTxAttrs attrs, uint8_t *buf,
2673 int len, hwaddr addr1, hwaddr l,
2674 MemoryRegion *mr)
2676 uint8_t *ptr;
2677 uint64_t val;
2678 MemTxResult result = MEMTX_OK;
2679 bool release_lock = false;
2681 for (;;) {
2682 if (!memory_access_is_direct(mr, false)) {
2683 /* I/O case */
2684 release_lock |= prepare_mmio_access(mr);
2685 l = memory_access_size(mr, l, addr1);
2686 switch (l) {
2687 case 8:
2688 /* 64 bit read access */
2689 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2690 attrs);
2691 stq_p(buf, val);
2692 break;
2693 case 4:
2694 /* 32 bit read access */
2695 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2696 attrs);
2697 stl_p(buf, val);
2698 break;
2699 case 2:
2700 /* 16 bit read access */
2701 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2702 attrs);
2703 stw_p(buf, val);
2704 break;
2705 case 1:
2706 /* 8 bit read access */
2707 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2708 attrs);
2709 stb_p(buf, val);
2710 break;
2711 default:
2712 abort();
2714 } else {
2715 /* RAM case */
2716 ptr = qemu_get_ram_ptr(mr->ram_block,
2717 memory_region_get_ram_addr(mr) + addr1);
2718 memcpy(buf, ptr, l);
2721 if (release_lock) {
2722 qemu_mutex_unlock_iothread();
2723 release_lock = false;
2726 len -= l;
2727 buf += l;
2728 addr += l;
2730 if (!len) {
2731 break;
2734 l = len;
2735 mr = address_space_translate(as, addr, &addr1, &l, false);
2738 return result;
2741 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2742 MemTxAttrs attrs, uint8_t *buf, int len)
2744 hwaddr l;
2745 hwaddr addr1;
2746 MemoryRegion *mr;
2747 MemTxResult result = MEMTX_OK;
2749 if (len > 0) {
2750 rcu_read_lock();
2751 l = len;
2752 mr = address_space_translate(as, addr, &addr1, &l, false);
2753 result = address_space_read_continue(as, addr, attrs, buf, len,
2754 addr1, l, mr);
2755 rcu_read_unlock();
2758 return result;
2761 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2762 uint8_t *buf, int len, bool is_write)
2764 if (is_write) {
2765 return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2766 } else {
2767 return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2771 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2772 int len, int is_write)
2774 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2775 buf, len, is_write);
2778 enum write_rom_type {
2779 WRITE_DATA,
2780 FLUSH_CACHE,
2783 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2784 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2786 hwaddr l;
2787 uint8_t *ptr;
2788 hwaddr addr1;
2789 MemoryRegion *mr;
2791 rcu_read_lock();
2792 while (len > 0) {
2793 l = len;
2794 mr = address_space_translate(as, addr, &addr1, &l, true);
2796 if (!(memory_region_is_ram(mr) ||
2797 memory_region_is_romd(mr))) {
2798 l = memory_access_size(mr, l, addr1);
2799 } else {
2800 addr1 += memory_region_get_ram_addr(mr);
2801 /* ROM/RAM case */
2802 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
2803 switch (type) {
2804 case WRITE_DATA:
2805 memcpy(ptr, buf, l);
2806 invalidate_and_set_dirty(mr, addr1, l);
2807 break;
2808 case FLUSH_CACHE:
2809 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2810 break;
2813 len -= l;
2814 buf += l;
2815 addr += l;
2817 rcu_read_unlock();
2820 /* used for ROM loading : can write in RAM and ROM */
2821 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2822 const uint8_t *buf, int len)
2824 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2827 void cpu_flush_icache_range(hwaddr start, int len)
2830 * This function should do the same thing as an icache flush that was
2831 * triggered from within the guest. For TCG we are always cache coherent,
2832 * so there is no need to flush anything. For KVM / Xen we need to flush
2833 * the host's instruction cache at least.
2835 if (tcg_enabled()) {
2836 return;
2839 cpu_physical_memory_write_rom_internal(&address_space_memory,
2840 start, NULL, len, FLUSH_CACHE);
2843 typedef struct {
2844 MemoryRegion *mr;
2845 void *buffer;
2846 hwaddr addr;
2847 hwaddr len;
2848 bool in_use;
2849 } BounceBuffer;
2851 static BounceBuffer bounce;
2853 typedef struct MapClient {
2854 QEMUBH *bh;
2855 QLIST_ENTRY(MapClient) link;
2856 } MapClient;
2858 QemuMutex map_client_list_lock;
2859 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2860 = QLIST_HEAD_INITIALIZER(map_client_list);
2862 static void cpu_unregister_map_client_do(MapClient *client)
2864 QLIST_REMOVE(client, link);
2865 g_free(client);
2868 static void cpu_notify_map_clients_locked(void)
2870 MapClient *client;
2872 while (!QLIST_EMPTY(&map_client_list)) {
2873 client = QLIST_FIRST(&map_client_list);
2874 qemu_bh_schedule(client->bh);
2875 cpu_unregister_map_client_do(client);
2879 void cpu_register_map_client(QEMUBH *bh)
2881 MapClient *client = g_malloc(sizeof(*client));
2883 qemu_mutex_lock(&map_client_list_lock);
2884 client->bh = bh;
2885 QLIST_INSERT_HEAD(&map_client_list, client, link);
2886 if (!atomic_read(&bounce.in_use)) {
2887 cpu_notify_map_clients_locked();
2889 qemu_mutex_unlock(&map_client_list_lock);
2892 void cpu_exec_init_all(void)
2894 qemu_mutex_init(&ram_list.mutex);
2895 io_mem_init();
2896 memory_map_init();
2897 qemu_mutex_init(&map_client_list_lock);
2900 void cpu_unregister_map_client(QEMUBH *bh)
2902 MapClient *client;
2904 qemu_mutex_lock(&map_client_list_lock);
2905 QLIST_FOREACH(client, &map_client_list, link) {
2906 if (client->bh == bh) {
2907 cpu_unregister_map_client_do(client);
2908 break;
2911 qemu_mutex_unlock(&map_client_list_lock);
2914 static void cpu_notify_map_clients(void)
2916 qemu_mutex_lock(&map_client_list_lock);
2917 cpu_notify_map_clients_locked();
2918 qemu_mutex_unlock(&map_client_list_lock);
2921 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2923 MemoryRegion *mr;
2924 hwaddr l, xlat;
2926 rcu_read_lock();
2927 while (len > 0) {
2928 l = len;
2929 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2930 if (!memory_access_is_direct(mr, is_write)) {
2931 l = memory_access_size(mr, l, addr);
2932 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2933 return false;
2937 len -= l;
2938 addr += l;
2940 rcu_read_unlock();
2941 return true;
2944 /* Map a physical memory region into a host virtual address.
2945 * May map a subset of the requested range, given by and returned in *plen.
2946 * May return NULL if resources needed to perform the mapping are exhausted.
2947 * Use only for reads OR writes - not for read-modify-write operations.
2948 * Use cpu_register_map_client() to know when retrying the map operation is
2949 * likely to succeed.
2951 void *address_space_map(AddressSpace *as,
2952 hwaddr addr,
2953 hwaddr *plen,
2954 bool is_write)
2956 hwaddr len = *plen;
2957 hwaddr done = 0;
2958 hwaddr l, xlat, base;
2959 MemoryRegion *mr, *this_mr;
2960 ram_addr_t raddr;
2961 void *ptr;
2963 if (len == 0) {
2964 return NULL;
2967 l = len;
2968 rcu_read_lock();
2969 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2971 if (!memory_access_is_direct(mr, is_write)) {
2972 if (atomic_xchg(&bounce.in_use, true)) {
2973 rcu_read_unlock();
2974 return NULL;
2976 /* Avoid unbounded allocations */
2977 l = MIN(l, TARGET_PAGE_SIZE);
2978 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2979 bounce.addr = addr;
2980 bounce.len = l;
2982 memory_region_ref(mr);
2983 bounce.mr = mr;
2984 if (!is_write) {
2985 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2986 bounce.buffer, l);
2989 rcu_read_unlock();
2990 *plen = l;
2991 return bounce.buffer;
2994 base = xlat;
2995 raddr = memory_region_get_ram_addr(mr);
2997 for (;;) {
2998 len -= l;
2999 addr += l;
3000 done += l;
3001 if (len == 0) {
3002 break;
3005 l = len;
3006 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
3007 if (this_mr != mr || xlat != base + done) {
3008 break;
3012 memory_region_ref(mr);
3013 *plen = done;
3014 ptr = qemu_ram_ptr_length(mr->ram_block, raddr + base, plen);
3015 rcu_read_unlock();
3017 return ptr;
3020 /* Unmaps a memory region previously mapped by address_space_map().
3021 * Will also mark the memory as dirty if is_write == 1. access_len gives
3022 * the amount of memory that was actually read or written by the caller.
3024 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3025 int is_write, hwaddr access_len)
3027 if (buffer != bounce.buffer) {
3028 MemoryRegion *mr;
3029 ram_addr_t addr1;
3031 mr = qemu_ram_addr_from_host(buffer, &addr1);
3032 assert(mr != NULL);
3033 if (is_write) {
3034 invalidate_and_set_dirty(mr, addr1, access_len);
3036 if (xen_enabled()) {
3037 xen_invalidate_map_cache_entry(buffer);
3039 memory_region_unref(mr);
3040 return;
3042 if (is_write) {
3043 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
3044 bounce.buffer, access_len);
3046 qemu_vfree(bounce.buffer);
3047 bounce.buffer = NULL;
3048 memory_region_unref(bounce.mr);
3049 atomic_mb_set(&bounce.in_use, false);
3050 cpu_notify_map_clients();
3053 void *cpu_physical_memory_map(hwaddr addr,
3054 hwaddr *plen,
3055 int is_write)
3057 return address_space_map(&address_space_memory, addr, plen, is_write);
3060 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3061 int is_write, hwaddr access_len)
3063 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3066 /* warning: addr must be aligned */
3067 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3068 MemTxAttrs attrs,
3069 MemTxResult *result,
3070 enum device_endian endian)
3072 uint8_t *ptr;
3073 uint64_t val;
3074 MemoryRegion *mr;
3075 hwaddr l = 4;
3076 hwaddr addr1;
3077 MemTxResult r;
3078 bool release_lock = false;
3080 rcu_read_lock();
3081 mr = address_space_translate(as, addr, &addr1, &l, false);
3082 if (l < 4 || !memory_access_is_direct(mr, false)) {
3083 release_lock |= prepare_mmio_access(mr);
3085 /* I/O case */
3086 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3087 #if defined(TARGET_WORDS_BIGENDIAN)
3088 if (endian == DEVICE_LITTLE_ENDIAN) {
3089 val = bswap32(val);
3091 #else
3092 if (endian == DEVICE_BIG_ENDIAN) {
3093 val = bswap32(val);
3095 #endif
3096 } else {
3097 /* RAM case */
3098 ptr = qemu_get_ram_ptr(mr->ram_block,
3099 (memory_region_get_ram_addr(mr)
3100 & TARGET_PAGE_MASK)
3101 + addr1);
3102 switch (endian) {
3103 case DEVICE_LITTLE_ENDIAN:
3104 val = ldl_le_p(ptr);
3105 break;
3106 case DEVICE_BIG_ENDIAN:
3107 val = ldl_be_p(ptr);
3108 break;
3109 default:
3110 val = ldl_p(ptr);
3111 break;
3113 r = MEMTX_OK;
3115 if (result) {
3116 *result = r;
3118 if (release_lock) {
3119 qemu_mutex_unlock_iothread();
3121 rcu_read_unlock();
3122 return val;
3125 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3126 MemTxAttrs attrs, MemTxResult *result)
3128 return address_space_ldl_internal(as, addr, attrs, result,
3129 DEVICE_NATIVE_ENDIAN);
3132 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3133 MemTxAttrs attrs, MemTxResult *result)
3135 return address_space_ldl_internal(as, addr, attrs, result,
3136 DEVICE_LITTLE_ENDIAN);
3139 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3140 MemTxAttrs attrs, MemTxResult *result)
3142 return address_space_ldl_internal(as, addr, attrs, result,
3143 DEVICE_BIG_ENDIAN);
3146 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3148 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3151 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3153 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3156 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3158 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3161 /* warning: addr must be aligned */
3162 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3163 MemTxAttrs attrs,
3164 MemTxResult *result,
3165 enum device_endian endian)
3167 uint8_t *ptr;
3168 uint64_t val;
3169 MemoryRegion *mr;
3170 hwaddr l = 8;
3171 hwaddr addr1;
3172 MemTxResult r;
3173 bool release_lock = false;
3175 rcu_read_lock();
3176 mr = address_space_translate(as, addr, &addr1, &l,
3177 false);
3178 if (l < 8 || !memory_access_is_direct(mr, false)) {
3179 release_lock |= prepare_mmio_access(mr);
3181 /* I/O case */
3182 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3183 #if defined(TARGET_WORDS_BIGENDIAN)
3184 if (endian == DEVICE_LITTLE_ENDIAN) {
3185 val = bswap64(val);
3187 #else
3188 if (endian == DEVICE_BIG_ENDIAN) {
3189 val = bswap64(val);
3191 #endif
3192 } else {
3193 /* RAM case */
3194 ptr = qemu_get_ram_ptr(mr->ram_block,
3195 (memory_region_get_ram_addr(mr)
3196 & TARGET_PAGE_MASK)
3197 + addr1);
3198 switch (endian) {
3199 case DEVICE_LITTLE_ENDIAN:
3200 val = ldq_le_p(ptr);
3201 break;
3202 case DEVICE_BIG_ENDIAN:
3203 val = ldq_be_p(ptr);
3204 break;
3205 default:
3206 val = ldq_p(ptr);
3207 break;
3209 r = MEMTX_OK;
3211 if (result) {
3212 *result = r;
3214 if (release_lock) {
3215 qemu_mutex_unlock_iothread();
3217 rcu_read_unlock();
3218 return val;
3221 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3222 MemTxAttrs attrs, MemTxResult *result)
3224 return address_space_ldq_internal(as, addr, attrs, result,
3225 DEVICE_NATIVE_ENDIAN);
3228 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3229 MemTxAttrs attrs, MemTxResult *result)
3231 return address_space_ldq_internal(as, addr, attrs, result,
3232 DEVICE_LITTLE_ENDIAN);
3235 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3236 MemTxAttrs attrs, MemTxResult *result)
3238 return address_space_ldq_internal(as, addr, attrs, result,
3239 DEVICE_BIG_ENDIAN);
3242 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3244 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3247 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3249 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3252 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3254 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3257 /* XXX: optimize */
3258 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3259 MemTxAttrs attrs, MemTxResult *result)
3261 uint8_t val;
3262 MemTxResult r;
3264 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3265 if (result) {
3266 *result = r;
3268 return val;
3271 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3273 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3276 /* warning: addr must be aligned */
3277 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3278 hwaddr addr,
3279 MemTxAttrs attrs,
3280 MemTxResult *result,
3281 enum device_endian endian)
3283 uint8_t *ptr;
3284 uint64_t val;
3285 MemoryRegion *mr;
3286 hwaddr l = 2;
3287 hwaddr addr1;
3288 MemTxResult r;
3289 bool release_lock = false;
3291 rcu_read_lock();
3292 mr = address_space_translate(as, addr, &addr1, &l,
3293 false);
3294 if (l < 2 || !memory_access_is_direct(mr, false)) {
3295 release_lock |= prepare_mmio_access(mr);
3297 /* I/O case */
3298 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3299 #if defined(TARGET_WORDS_BIGENDIAN)
3300 if (endian == DEVICE_LITTLE_ENDIAN) {
3301 val = bswap16(val);
3303 #else
3304 if (endian == DEVICE_BIG_ENDIAN) {
3305 val = bswap16(val);
3307 #endif
3308 } else {
3309 /* RAM case */
3310 ptr = qemu_get_ram_ptr(mr->ram_block,
3311 (memory_region_get_ram_addr(mr)
3312 & TARGET_PAGE_MASK)
3313 + addr1);
3314 switch (endian) {
3315 case DEVICE_LITTLE_ENDIAN:
3316 val = lduw_le_p(ptr);
3317 break;
3318 case DEVICE_BIG_ENDIAN:
3319 val = lduw_be_p(ptr);
3320 break;
3321 default:
3322 val = lduw_p(ptr);
3323 break;
3325 r = MEMTX_OK;
3327 if (result) {
3328 *result = r;
3330 if (release_lock) {
3331 qemu_mutex_unlock_iothread();
3333 rcu_read_unlock();
3334 return val;
3337 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3338 MemTxAttrs attrs, MemTxResult *result)
3340 return address_space_lduw_internal(as, addr, attrs, result,
3341 DEVICE_NATIVE_ENDIAN);
3344 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3345 MemTxAttrs attrs, MemTxResult *result)
3347 return address_space_lduw_internal(as, addr, attrs, result,
3348 DEVICE_LITTLE_ENDIAN);
3351 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3352 MemTxAttrs attrs, MemTxResult *result)
3354 return address_space_lduw_internal(as, addr, attrs, result,
3355 DEVICE_BIG_ENDIAN);
3358 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3360 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3363 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3365 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3368 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3370 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3373 /* warning: addr must be aligned. The ram page is not masked as dirty
3374 and the code inside is not invalidated. It is useful if the dirty
3375 bits are used to track modified PTEs */
3376 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3377 MemTxAttrs attrs, MemTxResult *result)
3379 uint8_t *ptr;
3380 MemoryRegion *mr;
3381 hwaddr l = 4;
3382 hwaddr addr1;
3383 MemTxResult r;
3384 uint8_t dirty_log_mask;
3385 bool release_lock = false;
3387 rcu_read_lock();
3388 mr = address_space_translate(as, addr, &addr1, &l,
3389 true);
3390 if (l < 4 || !memory_access_is_direct(mr, true)) {
3391 release_lock |= prepare_mmio_access(mr);
3393 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3394 } else {
3395 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3396 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3397 stl_p(ptr, val);
3399 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3400 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3401 cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3402 r = MEMTX_OK;
3404 if (result) {
3405 *result = r;
3407 if (release_lock) {
3408 qemu_mutex_unlock_iothread();
3410 rcu_read_unlock();
3413 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3415 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3418 /* warning: addr must be aligned */
3419 static inline void address_space_stl_internal(AddressSpace *as,
3420 hwaddr addr, uint32_t val,
3421 MemTxAttrs attrs,
3422 MemTxResult *result,
3423 enum device_endian endian)
3425 uint8_t *ptr;
3426 MemoryRegion *mr;
3427 hwaddr l = 4;
3428 hwaddr addr1;
3429 MemTxResult r;
3430 bool release_lock = false;
3432 rcu_read_lock();
3433 mr = address_space_translate(as, addr, &addr1, &l,
3434 true);
3435 if (l < 4 || !memory_access_is_direct(mr, true)) {
3436 release_lock |= prepare_mmio_access(mr);
3438 #if defined(TARGET_WORDS_BIGENDIAN)
3439 if (endian == DEVICE_LITTLE_ENDIAN) {
3440 val = bswap32(val);
3442 #else
3443 if (endian == DEVICE_BIG_ENDIAN) {
3444 val = bswap32(val);
3446 #endif
3447 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3448 } else {
3449 /* RAM case */
3450 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3451 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3452 switch (endian) {
3453 case DEVICE_LITTLE_ENDIAN:
3454 stl_le_p(ptr, val);
3455 break;
3456 case DEVICE_BIG_ENDIAN:
3457 stl_be_p(ptr, val);
3458 break;
3459 default:
3460 stl_p(ptr, val);
3461 break;
3463 invalidate_and_set_dirty(mr, addr1, 4);
3464 r = MEMTX_OK;
3466 if (result) {
3467 *result = r;
3469 if (release_lock) {
3470 qemu_mutex_unlock_iothread();
3472 rcu_read_unlock();
3475 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3476 MemTxAttrs attrs, MemTxResult *result)
3478 address_space_stl_internal(as, addr, val, attrs, result,
3479 DEVICE_NATIVE_ENDIAN);
3482 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3483 MemTxAttrs attrs, MemTxResult *result)
3485 address_space_stl_internal(as, addr, val, attrs, result,
3486 DEVICE_LITTLE_ENDIAN);
3489 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3490 MemTxAttrs attrs, MemTxResult *result)
3492 address_space_stl_internal(as, addr, val, attrs, result,
3493 DEVICE_BIG_ENDIAN);
3496 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3498 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3501 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3503 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3506 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3508 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3511 /* XXX: optimize */
3512 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3513 MemTxAttrs attrs, MemTxResult *result)
3515 uint8_t v = val;
3516 MemTxResult r;
3518 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3519 if (result) {
3520 *result = r;
3524 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3526 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3529 /* warning: addr must be aligned */
3530 static inline void address_space_stw_internal(AddressSpace *as,
3531 hwaddr addr, uint32_t val,
3532 MemTxAttrs attrs,
3533 MemTxResult *result,
3534 enum device_endian endian)
3536 uint8_t *ptr;
3537 MemoryRegion *mr;
3538 hwaddr l = 2;
3539 hwaddr addr1;
3540 MemTxResult r;
3541 bool release_lock = false;
3543 rcu_read_lock();
3544 mr = address_space_translate(as, addr, &addr1, &l, true);
3545 if (l < 2 || !memory_access_is_direct(mr, true)) {
3546 release_lock |= prepare_mmio_access(mr);
3548 #if defined(TARGET_WORDS_BIGENDIAN)
3549 if (endian == DEVICE_LITTLE_ENDIAN) {
3550 val = bswap16(val);
3552 #else
3553 if (endian == DEVICE_BIG_ENDIAN) {
3554 val = bswap16(val);
3556 #endif
3557 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3558 } else {
3559 /* RAM case */
3560 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3561 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3562 switch (endian) {
3563 case DEVICE_LITTLE_ENDIAN:
3564 stw_le_p(ptr, val);
3565 break;
3566 case DEVICE_BIG_ENDIAN:
3567 stw_be_p(ptr, val);
3568 break;
3569 default:
3570 stw_p(ptr, val);
3571 break;
3573 invalidate_and_set_dirty(mr, addr1, 2);
3574 r = MEMTX_OK;
3576 if (result) {
3577 *result = r;
3579 if (release_lock) {
3580 qemu_mutex_unlock_iothread();
3582 rcu_read_unlock();
3585 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3586 MemTxAttrs attrs, MemTxResult *result)
3588 address_space_stw_internal(as, addr, val, attrs, result,
3589 DEVICE_NATIVE_ENDIAN);
3592 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3593 MemTxAttrs attrs, MemTxResult *result)
3595 address_space_stw_internal(as, addr, val, attrs, result,
3596 DEVICE_LITTLE_ENDIAN);
3599 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3600 MemTxAttrs attrs, MemTxResult *result)
3602 address_space_stw_internal(as, addr, val, attrs, result,
3603 DEVICE_BIG_ENDIAN);
3606 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3608 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3611 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3613 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3616 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3618 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3621 /* XXX: optimize */
3622 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3623 MemTxAttrs attrs, MemTxResult *result)
3625 MemTxResult r;
3626 val = tswap64(val);
3627 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3628 if (result) {
3629 *result = r;
3633 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3634 MemTxAttrs attrs, MemTxResult *result)
3636 MemTxResult r;
3637 val = cpu_to_le64(val);
3638 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3639 if (result) {
3640 *result = r;
3643 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3644 MemTxAttrs attrs, MemTxResult *result)
3646 MemTxResult r;
3647 val = cpu_to_be64(val);
3648 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3649 if (result) {
3650 *result = r;
3654 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3656 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3659 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3661 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3664 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3666 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3669 /* virtual memory access for debug (includes writing to ROM) */
3670 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3671 uint8_t *buf, int len, int is_write)
3673 int l;
3674 hwaddr phys_addr;
3675 target_ulong page;
3677 while (len > 0) {
3678 int asidx;
3679 MemTxAttrs attrs;
3681 page = addr & TARGET_PAGE_MASK;
3682 phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3683 asidx = cpu_asidx_from_attrs(cpu, attrs);
3684 /* if no physical page mapped, return an error */
3685 if (phys_addr == -1)
3686 return -1;
3687 l = (page + TARGET_PAGE_SIZE) - addr;
3688 if (l > len)
3689 l = len;
3690 phys_addr += (addr & ~TARGET_PAGE_MASK);
3691 if (is_write) {
3692 cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3693 phys_addr, buf, l);
3694 } else {
3695 address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3696 MEMTXATTRS_UNSPECIFIED,
3697 buf, l, 0);
3699 len -= l;
3700 buf += l;
3701 addr += l;
3703 return 0;
3707 * Allows code that needs to deal with migration bitmaps etc to still be built
3708 * target independent.
3710 size_t qemu_target_page_bits(void)
3712 return TARGET_PAGE_BITS;
3715 #endif
3718 * A helper function for the _utterly broken_ virtio device model to find out if
3719 * it's running on a big endian machine. Don't do this at home kids!
3721 bool target_words_bigendian(void);
3722 bool target_words_bigendian(void)
3724 #if defined(TARGET_WORDS_BIGENDIAN)
3725 return true;
3726 #else
3727 return false;
3728 #endif
3731 #ifndef CONFIG_USER_ONLY
3732 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3734 MemoryRegion*mr;
3735 hwaddr l = 1;
3736 bool res;
3738 rcu_read_lock();
3739 mr = address_space_translate(&address_space_memory,
3740 phys_addr, &phys_addr, &l, false);
3742 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3743 rcu_read_unlock();
3744 return res;
3747 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3749 RAMBlock *block;
3750 int ret = 0;
3752 rcu_read_lock();
3753 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3754 ret = func(block->idstr, block->host, block->offset,
3755 block->used_length, opaque);
3756 if (ret) {
3757 break;
3760 rcu_read_unlock();
3761 return ret;
3763 #endif