Plumb the HAXM-based hardware acceleration support
[qemu/ar7.git] / exec.c
blob34fa7b0708d112bcafdeb71d96b58ea5913ad511
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include "qapi/error.h"
21 #ifndef _WIN32
22 #endif
24 #include "qemu/cutils.h"
25 #include "cpu.h"
26 #include "exec/exec-all.h"
27 #include "tcg.h"
28 #include "hw/qdev-core.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
31 #include "hw/xen/xen.h"
32 #endif
33 #include "sysemu/kvm.h"
34 #include "sysemu/hax.h"
35 #include "sysemu/sysemu.h"
36 #include "qemu/timer.h"
37 #include "qemu/config-file.h"
38 #include "qemu/error-report.h"
39 #if defined(CONFIG_USER_ONLY)
40 #include "qemu.h"
41 #else /* !CONFIG_USER_ONLY */
42 #include "hw/hw.h"
43 #include "exec/memory.h"
44 #include "exec/ioport.h"
45 #include "sysemu/dma.h"
46 #include "exec/address-spaces.h"
47 #include "sysemu/xen-mapcache.h"
48 #include "trace.h"
49 #endif
50 #include "exec/cpu-all.h"
51 #include "qemu/rcu_queue.h"
52 #include "qemu/main-loop.h"
53 #include "translate-all.h"
54 #include "sysemu/replay.h"
56 #include "exec/memory-internal.h"
57 #include "exec/ram_addr.h"
58 #include "exec/log.h"
60 #include "migration/vmstate.h"
62 #include "qemu/range.h"
63 #ifndef _WIN32
64 #include "qemu/mmap-alloc.h"
65 #endif
67 //#define DEBUG_SUBPAGE
69 #if !defined(CONFIG_USER_ONLY)
70 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
71 * are protected by the ramlist lock.
73 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
75 static MemoryRegion *system_memory;
76 static MemoryRegion *system_io;
78 AddressSpace address_space_io;
79 AddressSpace address_space_memory;
81 MemoryRegion io_mem_rom, io_mem_notdirty;
82 static MemoryRegion io_mem_unassigned;
84 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
85 #define RAM_PREALLOC (1 << 0)
87 /* RAM is mmap-ed with MAP_SHARED */
88 #define RAM_SHARED (1 << 1)
90 /* Only a portion of RAM (used_length) is actually used, and migrated.
91 * This used_length size can change across reboots.
93 #define RAM_RESIZEABLE (1 << 2)
95 #endif
97 #ifdef TARGET_PAGE_BITS_VARY
98 int target_page_bits;
99 bool target_page_bits_decided;
100 #endif
102 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
103 /* current CPU in the current thread. It is only valid inside
104 cpu_exec() */
105 __thread CPUState *current_cpu;
106 /* 0 = Do not count executed instructions.
107 1 = Precise instruction counting.
108 2 = Adaptive rate instruction counting. */
109 int use_icount;
111 bool set_preferred_target_page_bits(int bits)
113 /* The target page size is the lowest common denominator for all
114 * the CPUs in the system, so we can only make it smaller, never
115 * larger. And we can't make it smaller once we've committed to
116 * a particular size.
118 #ifdef TARGET_PAGE_BITS_VARY
119 assert(bits >= TARGET_PAGE_BITS_MIN);
120 if (target_page_bits == 0 || target_page_bits > bits) {
121 if (target_page_bits_decided) {
122 return false;
124 target_page_bits = bits;
126 #endif
127 return true;
130 #if !defined(CONFIG_USER_ONLY)
132 static void finalize_target_page_bits(void)
134 #ifdef TARGET_PAGE_BITS_VARY
135 if (target_page_bits == 0) {
136 target_page_bits = TARGET_PAGE_BITS_MIN;
138 target_page_bits_decided = true;
139 #endif
142 typedef struct PhysPageEntry PhysPageEntry;
144 struct PhysPageEntry {
145 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
146 uint32_t skip : 6;
147 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
148 uint32_t ptr : 26;
151 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
153 /* Size of the L2 (and L3, etc) page tables. */
154 #define ADDR_SPACE_BITS 64
156 #define P_L2_BITS 9
157 #define P_L2_SIZE (1 << P_L2_BITS)
159 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
161 typedef PhysPageEntry Node[P_L2_SIZE];
163 typedef struct PhysPageMap {
164 struct rcu_head rcu;
166 unsigned sections_nb;
167 unsigned sections_nb_alloc;
168 unsigned nodes_nb;
169 unsigned nodes_nb_alloc;
170 Node *nodes;
171 MemoryRegionSection *sections;
172 } PhysPageMap;
174 struct AddressSpaceDispatch {
175 struct rcu_head rcu;
177 MemoryRegionSection *mru_section;
178 /* This is a multi-level map on the physical address space.
179 * The bottom level has pointers to MemoryRegionSections.
181 PhysPageEntry phys_map;
182 PhysPageMap map;
183 AddressSpace *as;
186 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
187 typedef struct subpage_t {
188 MemoryRegion iomem;
189 AddressSpace *as;
190 hwaddr base;
191 uint16_t sub_section[];
192 } subpage_t;
194 #define PHYS_SECTION_UNASSIGNED 0
195 #define PHYS_SECTION_NOTDIRTY 1
196 #define PHYS_SECTION_ROM 2
197 #define PHYS_SECTION_WATCH 3
199 static void io_mem_init(void);
200 static void memory_map_init(void);
201 static void tcg_commit(MemoryListener *listener);
203 static MemoryRegion io_mem_watch;
206 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
207 * @cpu: the CPU whose AddressSpace this is
208 * @as: the AddressSpace itself
209 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
210 * @tcg_as_listener: listener for tracking changes to the AddressSpace
212 struct CPUAddressSpace {
213 CPUState *cpu;
214 AddressSpace *as;
215 struct AddressSpaceDispatch *memory_dispatch;
216 MemoryListener tcg_as_listener;
219 #endif
221 #if !defined(CONFIG_USER_ONLY)
223 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
225 static unsigned alloc_hint = 16;
226 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
227 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, alloc_hint);
228 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
229 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
230 alloc_hint = map->nodes_nb_alloc;
234 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
236 unsigned i;
237 uint32_t ret;
238 PhysPageEntry e;
239 PhysPageEntry *p;
241 ret = map->nodes_nb++;
242 p = map->nodes[ret];
243 assert(ret != PHYS_MAP_NODE_NIL);
244 assert(ret != map->nodes_nb_alloc);
246 e.skip = leaf ? 0 : 1;
247 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
248 for (i = 0; i < P_L2_SIZE; ++i) {
249 memcpy(&p[i], &e, sizeof(e));
251 return ret;
254 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
255 hwaddr *index, hwaddr *nb, uint16_t leaf,
256 int level)
258 PhysPageEntry *p;
259 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
261 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
262 lp->ptr = phys_map_node_alloc(map, level == 0);
264 p = map->nodes[lp->ptr];
265 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
267 while (*nb && lp < &p[P_L2_SIZE]) {
268 if ((*index & (step - 1)) == 0 && *nb >= step) {
269 lp->skip = 0;
270 lp->ptr = leaf;
271 *index += step;
272 *nb -= step;
273 } else {
274 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
276 ++lp;
280 static void phys_page_set(AddressSpaceDispatch *d,
281 hwaddr index, hwaddr nb,
282 uint16_t leaf)
284 /* Wildly overreserve - it doesn't matter much. */
285 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
287 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
290 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
291 * and update our entry so we can skip it and go directly to the destination.
293 static void phys_page_compact(PhysPageEntry *lp, Node *nodes)
295 unsigned valid_ptr = P_L2_SIZE;
296 int valid = 0;
297 PhysPageEntry *p;
298 int i;
300 if (lp->ptr == PHYS_MAP_NODE_NIL) {
301 return;
304 p = nodes[lp->ptr];
305 for (i = 0; i < P_L2_SIZE; i++) {
306 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
307 continue;
310 valid_ptr = i;
311 valid++;
312 if (p[i].skip) {
313 phys_page_compact(&p[i], nodes);
317 /* We can only compress if there's only one child. */
318 if (valid != 1) {
319 return;
322 assert(valid_ptr < P_L2_SIZE);
324 /* Don't compress if it won't fit in the # of bits we have. */
325 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
326 return;
329 lp->ptr = p[valid_ptr].ptr;
330 if (!p[valid_ptr].skip) {
331 /* If our only child is a leaf, make this a leaf. */
332 /* By design, we should have made this node a leaf to begin with so we
333 * should never reach here.
334 * But since it's so simple to handle this, let's do it just in case we
335 * change this rule.
337 lp->skip = 0;
338 } else {
339 lp->skip += p[valid_ptr].skip;
343 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
345 if (d->phys_map.skip) {
346 phys_page_compact(&d->phys_map, d->map.nodes);
350 static inline bool section_covers_addr(const MemoryRegionSection *section,
351 hwaddr addr)
353 /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
354 * the section must cover the entire address space.
356 return int128_gethi(section->size) ||
357 range_covers_byte(section->offset_within_address_space,
358 int128_getlo(section->size), addr);
361 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
362 Node *nodes, MemoryRegionSection *sections)
364 PhysPageEntry *p;
365 hwaddr index = addr >> TARGET_PAGE_BITS;
366 int i;
368 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
369 if (lp.ptr == PHYS_MAP_NODE_NIL) {
370 return &sections[PHYS_SECTION_UNASSIGNED];
372 p = nodes[lp.ptr];
373 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
376 if (section_covers_addr(&sections[lp.ptr], addr)) {
377 return &sections[lp.ptr];
378 } else {
379 return &sections[PHYS_SECTION_UNASSIGNED];
383 bool memory_region_is_unassigned(MemoryRegion *mr)
385 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
386 && mr != &io_mem_watch;
389 /* Called from RCU critical section */
390 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
391 hwaddr addr,
392 bool resolve_subpage)
394 MemoryRegionSection *section = atomic_read(&d->mru_section);
395 subpage_t *subpage;
396 bool update;
398 if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
399 section_covers_addr(section, addr)) {
400 update = false;
401 } else {
402 section = phys_page_find(d->phys_map, addr, d->map.nodes,
403 d->map.sections);
404 update = true;
406 if (resolve_subpage && section->mr->subpage) {
407 subpage = container_of(section->mr, subpage_t, iomem);
408 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
410 if (update) {
411 atomic_set(&d->mru_section, section);
413 return section;
416 /* Called from RCU critical section */
417 static MemoryRegionSection *
418 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
419 hwaddr *plen, bool resolve_subpage)
421 MemoryRegionSection *section;
422 MemoryRegion *mr;
423 Int128 diff;
425 section = address_space_lookup_region(d, addr, resolve_subpage);
426 /* Compute offset within MemoryRegionSection */
427 addr -= section->offset_within_address_space;
429 /* Compute offset within MemoryRegion */
430 *xlat = addr + section->offset_within_region;
432 mr = section->mr;
434 /* MMIO registers can be expected to perform full-width accesses based only
435 * on their address, without considering adjacent registers that could
436 * decode to completely different MemoryRegions. When such registers
437 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
438 * regions overlap wildly. For this reason we cannot clamp the accesses
439 * here.
441 * If the length is small (as is the case for address_space_ldl/stl),
442 * everything works fine. If the incoming length is large, however,
443 * the caller really has to do the clamping through memory_access_size.
445 if (memory_region_is_ram(mr)) {
446 diff = int128_sub(section->size, int128_make64(addr));
447 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
449 return section;
452 /* Called from RCU critical section */
453 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
454 hwaddr *xlat, hwaddr *plen,
455 bool is_write)
457 IOMMUTLBEntry iotlb;
458 MemoryRegionSection *section;
459 MemoryRegion *mr;
461 for (;;) {
462 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
463 section = address_space_translate_internal(d, addr, &addr, plen, true);
464 mr = section->mr;
466 if (!mr->iommu_ops) {
467 break;
470 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
471 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
472 | (addr & iotlb.addr_mask));
473 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
474 if (!(iotlb.perm & (1 << is_write))) {
475 mr = &io_mem_unassigned;
476 break;
479 as = iotlb.target_as;
482 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
483 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
484 *plen = MIN(page, *plen);
487 *xlat = addr;
488 return mr;
491 /* Called from RCU critical section */
492 MemoryRegionSection *
493 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
494 hwaddr *xlat, hwaddr *plen)
496 MemoryRegionSection *section;
497 AddressSpaceDispatch *d = atomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch);
499 section = address_space_translate_internal(d, addr, xlat, plen, false);
501 assert(!section->mr->iommu_ops);
502 return section;
504 #endif
506 #if !defined(CONFIG_USER_ONLY)
508 static int cpu_common_post_load(void *opaque, int version_id)
510 CPUState *cpu = opaque;
512 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
513 version_id is increased. */
514 cpu->interrupt_request &= ~0x01;
515 tlb_flush(cpu, 1);
517 return 0;
520 static int cpu_common_pre_load(void *opaque)
522 CPUState *cpu = opaque;
524 cpu->exception_index = -1;
526 return 0;
529 static bool cpu_common_exception_index_needed(void *opaque)
531 CPUState *cpu = opaque;
533 return tcg_enabled() && cpu->exception_index != -1;
536 static const VMStateDescription vmstate_cpu_common_exception_index = {
537 .name = "cpu_common/exception_index",
538 .version_id = 1,
539 .minimum_version_id = 1,
540 .needed = cpu_common_exception_index_needed,
541 .fields = (VMStateField[]) {
542 VMSTATE_INT32(exception_index, CPUState),
543 VMSTATE_END_OF_LIST()
547 static bool cpu_common_crash_occurred_needed(void *opaque)
549 CPUState *cpu = opaque;
551 return cpu->crash_occurred;
554 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
555 .name = "cpu_common/crash_occurred",
556 .version_id = 1,
557 .minimum_version_id = 1,
558 .needed = cpu_common_crash_occurred_needed,
559 .fields = (VMStateField[]) {
560 VMSTATE_BOOL(crash_occurred, CPUState),
561 VMSTATE_END_OF_LIST()
565 const VMStateDescription vmstate_cpu_common = {
566 .name = "cpu_common",
567 .version_id = 1,
568 .minimum_version_id = 1,
569 .pre_load = cpu_common_pre_load,
570 .post_load = cpu_common_post_load,
571 .fields = (VMStateField[]) {
572 VMSTATE_UINT32(halted, CPUState),
573 VMSTATE_UINT32(interrupt_request, CPUState),
574 VMSTATE_END_OF_LIST()
576 .subsections = (const VMStateDescription*[]) {
577 &vmstate_cpu_common_exception_index,
578 &vmstate_cpu_common_crash_occurred,
579 NULL
583 #endif
585 CPUState *qemu_get_cpu(int index)
587 CPUState *cpu;
589 CPU_FOREACH(cpu) {
590 if (cpu->cpu_index == index) {
591 return cpu;
595 return NULL;
598 #if !defined(CONFIG_USER_ONLY)
599 void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
601 CPUAddressSpace *newas;
603 /* Target code should have set num_ases before calling us */
604 assert(asidx < cpu->num_ases);
606 if (asidx == 0) {
607 /* address space 0 gets the convenience alias */
608 cpu->as = as;
611 /* KVM cannot currently support multiple address spaces. */
612 assert(asidx == 0 || !kvm_enabled());
614 if (!cpu->cpu_ases) {
615 cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
618 newas = &cpu->cpu_ases[asidx];
619 newas->cpu = cpu;
620 newas->as = as;
621 if (tcg_enabled()) {
622 newas->tcg_as_listener.commit = tcg_commit;
623 memory_listener_register(&newas->tcg_as_listener, as);
627 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
629 /* Return the AddressSpace corresponding to the specified index */
630 return cpu->cpu_ases[asidx].as;
632 #endif
634 void cpu_exec_unrealizefn(CPUState *cpu)
636 CPUClass *cc = CPU_GET_CLASS(cpu);
638 cpu_list_remove(cpu);
640 if (cc->vmsd != NULL) {
641 vmstate_unregister(NULL, cc->vmsd, cpu);
643 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
644 vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
648 void cpu_exec_initfn(CPUState *cpu)
650 #ifdef TARGET_WORDS_BIGENDIAN
651 cpu->bigendian = true;
652 #else
653 cpu->bigendian = false;
654 #endif
655 cpu->as = NULL;
656 cpu->num_ases = 0;
658 #ifndef CONFIG_USER_ONLY
659 cpu->thread_id = qemu_get_thread_id();
661 /* This is a softmmu CPU object, so create a property for it
662 * so users can wire up its memory. (This can't go in qom/cpu.c
663 * because that file is compiled only once for both user-mode
664 * and system builds.) The default if no link is set up is to use
665 * the system address space.
667 object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
668 (Object **)&cpu->memory,
669 qdev_prop_allow_set_link_before_realize,
670 OBJ_PROP_LINK_UNREF_ON_RELEASE,
671 &error_abort);
672 cpu->memory = system_memory;
673 object_ref(OBJECT(cpu->memory));
674 #endif
677 void cpu_exec_realizefn(CPUState *cpu, Error **errp)
679 CPUClass *cc ATTRIBUTE_UNUSED = CPU_GET_CLASS(cpu);
681 cpu_list_add(cpu);
683 #ifndef CONFIG_USER_ONLY
684 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
685 vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
687 if (cc->vmsd != NULL) {
688 vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
690 #endif
693 #if defined(CONFIG_USER_ONLY)
694 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
696 mmap_lock();
697 tb_lock();
698 tb_invalidate_phys_page_range(pc, pc + 1, 0);
699 tb_unlock();
700 mmap_unlock();
702 #else
703 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
705 MemTxAttrs attrs;
706 hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
707 int asidx = cpu_asidx_from_attrs(cpu, attrs);
708 if (phys != -1) {
709 /* Locks grabbed by tb_invalidate_phys_addr */
710 tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
711 phys | (pc & ~TARGET_PAGE_MASK));
714 #endif
716 #if defined(CONFIG_USER_ONLY)
717 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
722 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
723 int flags)
725 return -ENOSYS;
728 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
732 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
733 int flags, CPUWatchpoint **watchpoint)
735 return -ENOSYS;
737 #else
738 /* Add a watchpoint. */
739 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
740 int flags, CPUWatchpoint **watchpoint)
742 CPUWatchpoint *wp;
744 /* forbid ranges which are empty or run off the end of the address space */
745 if (len == 0 || (addr + len - 1) < addr) {
746 error_report("tried to set invalid watchpoint at %"
747 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
748 return -EINVAL;
750 wp = g_malloc(sizeof(*wp));
752 wp->vaddr = addr;
753 wp->len = len;
754 wp->flags = flags;
756 /* keep all GDB-injected watchpoints in front */
757 if (flags & BP_GDB) {
758 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
759 } else {
760 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
763 tlb_flush_page(cpu, addr);
765 if (watchpoint)
766 *watchpoint = wp;
767 return 0;
770 /* Remove a specific watchpoint. */
771 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
772 int flags)
774 CPUWatchpoint *wp;
776 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
777 if (addr == wp->vaddr && len == wp->len
778 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
779 cpu_watchpoint_remove_by_ref(cpu, wp);
780 return 0;
783 return -ENOENT;
786 /* Remove a specific watchpoint by reference. */
787 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
789 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
791 tlb_flush_page(cpu, watchpoint->vaddr);
793 g_free(watchpoint);
796 /* Remove all matching watchpoints. */
797 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
799 CPUWatchpoint *wp, *next;
801 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
802 if (wp->flags & mask) {
803 cpu_watchpoint_remove_by_ref(cpu, wp);
808 /* Return true if this watchpoint address matches the specified
809 * access (ie the address range covered by the watchpoint overlaps
810 * partially or completely with the address range covered by the
811 * access).
813 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
814 vaddr addr,
815 vaddr len)
817 /* We know the lengths are non-zero, but a little caution is
818 * required to avoid errors in the case where the range ends
819 * exactly at the top of the address space and so addr + len
820 * wraps round to zero.
822 vaddr wpend = wp->vaddr + wp->len - 1;
823 vaddr addrend = addr + len - 1;
825 return !(addr > wpend || wp->vaddr > addrend);
828 #endif
830 /* Add a breakpoint. */
831 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
832 CPUBreakpoint **breakpoint)
834 CPUBreakpoint *bp;
836 bp = g_malloc(sizeof(*bp));
838 bp->pc = pc;
839 bp->flags = flags;
841 /* keep all GDB-injected breakpoints in front */
842 if (flags & BP_GDB) {
843 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
844 } else {
845 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
848 breakpoint_invalidate(cpu, pc);
850 if (breakpoint) {
851 *breakpoint = bp;
853 return 0;
856 /* Remove a specific breakpoint. */
857 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
859 CPUBreakpoint *bp;
861 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
862 if (bp->pc == pc && bp->flags == flags) {
863 cpu_breakpoint_remove_by_ref(cpu, bp);
864 return 0;
867 return -ENOENT;
870 /* Remove a specific breakpoint by reference. */
871 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
873 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
875 breakpoint_invalidate(cpu, breakpoint->pc);
877 g_free(breakpoint);
880 /* Remove all matching breakpoints. */
881 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
883 CPUBreakpoint *bp, *next;
885 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
886 if (bp->flags & mask) {
887 cpu_breakpoint_remove_by_ref(cpu, bp);
892 /* enable or disable single step mode. EXCP_DEBUG is returned by the
893 CPU loop after each instruction */
894 void cpu_single_step(CPUState *cpu, int enabled)
896 if (cpu->singlestep_enabled != enabled) {
897 cpu->singlestep_enabled = enabled;
898 if (kvm_enabled()) {
899 kvm_update_guest_debug(cpu, 0);
900 } else {
901 /* must flush all the translated code to avoid inconsistencies */
902 /* XXX: only flush what is necessary */
903 tb_flush(cpu);
908 void QEMU_NORETURN cpu_abort(CPUState *cpu, const char *fmt, ...)
910 va_list ap;
911 va_list ap2;
913 va_start(ap, fmt);
914 va_copy(ap2, ap);
915 fprintf(stderr, "qemu: fatal: ");
916 vfprintf(stderr, fmt, ap);
917 fprintf(stderr, "\n");
918 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
919 if (qemu_log_separate()) {
920 qemu_log_lock();
921 qemu_log("qemu: fatal: ");
922 qemu_log_vprintf(fmt, ap2);
923 qemu_log("\n");
924 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
925 qemu_log_flush();
926 qemu_log_unlock();
927 qemu_log_close();
929 va_end(ap2);
930 va_end(ap);
931 replay_finish();
932 #if defined(CONFIG_USER_ONLY)
934 struct sigaction act;
935 sigfillset(&act.sa_mask);
936 act.sa_handler = SIG_DFL;
937 sigaction(SIGABRT, &act, NULL);
939 #endif
940 abort();
943 #if !defined(CONFIG_USER_ONLY)
944 /* Called from RCU critical section */
945 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
947 RAMBlock *block;
949 block = atomic_rcu_read(&ram_list.mru_block);
950 if (block && addr - block->offset < block->max_length) {
951 return block;
953 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
954 if (addr - block->offset < block->max_length) {
955 goto found;
959 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
960 abort();
962 found:
963 /* It is safe to write mru_block outside the iothread lock. This
964 * is what happens:
966 * mru_block = xxx
967 * rcu_read_unlock()
968 * xxx removed from list
969 * rcu_read_lock()
970 * read mru_block
971 * mru_block = NULL;
972 * call_rcu(reclaim_ramblock, xxx);
973 * rcu_read_unlock()
975 * atomic_rcu_set is not needed here. The block was already published
976 * when it was placed into the list. Here we're just making an extra
977 * copy of the pointer.
979 ram_list.mru_block = block;
980 return block;
983 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
985 CPUState *cpu;
986 ram_addr_t start1;
987 RAMBlock *block;
988 ram_addr_t end;
990 end = TARGET_PAGE_ALIGN(start + length);
991 start &= TARGET_PAGE_MASK;
993 rcu_read_lock();
994 block = qemu_get_ram_block(start);
995 assert(block == qemu_get_ram_block(end - 1));
996 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
997 CPU_FOREACH(cpu) {
998 tlb_reset_dirty(cpu, start1, length);
1000 rcu_read_unlock();
1003 /* Note: start and end must be within the same ram block. */
1004 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
1005 ram_addr_t length,
1006 unsigned client)
1008 DirtyMemoryBlocks *blocks;
1009 unsigned long end, page;
1010 bool dirty = false;
1012 if (length == 0) {
1013 return false;
1016 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
1017 page = start >> TARGET_PAGE_BITS;
1019 rcu_read_lock();
1021 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1023 while (page < end) {
1024 unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1025 unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1026 unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1028 dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1029 offset, num);
1030 page += num;
1033 rcu_read_unlock();
1035 if (dirty && tcg_enabled()) {
1036 tlb_reset_dirty_range_all(start, length);
1039 return dirty;
1042 /* Called from RCU critical section */
1043 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1044 MemoryRegionSection *section,
1045 target_ulong vaddr,
1046 hwaddr paddr, hwaddr xlat,
1047 int prot,
1048 target_ulong *address)
1050 hwaddr iotlb;
1051 CPUWatchpoint *wp;
1053 if (memory_region_is_ram(section->mr)) {
1054 /* Normal RAM. */
1055 iotlb = memory_region_get_ram_addr(section->mr) + xlat;
1056 if (!section->readonly) {
1057 iotlb |= PHYS_SECTION_NOTDIRTY;
1058 } else {
1059 iotlb |= PHYS_SECTION_ROM;
1061 } else {
1062 AddressSpaceDispatch *d;
1064 d = atomic_rcu_read(&section->address_space->dispatch);
1065 iotlb = section - d->map.sections;
1066 iotlb += xlat;
1069 /* Make accesses to pages with watchpoints go via the
1070 watchpoint trap routines. */
1071 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1072 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1073 /* Avoid trapping reads of pages with a write breakpoint. */
1074 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1075 iotlb = PHYS_SECTION_WATCH + paddr;
1076 *address |= TLB_MMIO;
1077 break;
1082 return iotlb;
1084 #endif /* defined(CONFIG_USER_ONLY) */
1086 #if !defined(CONFIG_USER_ONLY)
1088 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1089 uint16_t section);
1090 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1092 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1093 qemu_anon_ram_alloc;
1096 * Set a custom physical guest memory alloator.
1097 * Accelerators with unusual needs may need this. Hopefully, we can
1098 * get rid of it eventually.
1100 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1102 phys_mem_alloc = alloc;
1105 static uint16_t phys_section_add(PhysPageMap *map,
1106 MemoryRegionSection *section)
1108 /* The physical section number is ORed with a page-aligned
1109 * pointer to produce the iotlb entries. Thus it should
1110 * never overflow into the page-aligned value.
1112 assert(map->sections_nb < TARGET_PAGE_SIZE);
1114 if (map->sections_nb == map->sections_nb_alloc) {
1115 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1116 map->sections = g_renew(MemoryRegionSection, map->sections,
1117 map->sections_nb_alloc);
1119 map->sections[map->sections_nb] = *section;
1120 memory_region_ref(section->mr);
1121 return map->sections_nb++;
1124 static void phys_section_destroy(MemoryRegion *mr)
1126 bool have_sub_page = mr->subpage;
1128 memory_region_unref(mr);
1130 if (have_sub_page) {
1131 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1132 object_unref(OBJECT(&subpage->iomem));
1133 g_free(subpage);
1137 static void phys_sections_free(PhysPageMap *map)
1139 while (map->sections_nb > 0) {
1140 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1141 phys_section_destroy(section->mr);
1143 g_free(map->sections);
1144 g_free(map->nodes);
1147 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1149 subpage_t *subpage;
1150 hwaddr base = section->offset_within_address_space
1151 & TARGET_PAGE_MASK;
1152 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1153 d->map.nodes, d->map.sections);
1154 MemoryRegionSection subsection = {
1155 .offset_within_address_space = base,
1156 .size = int128_make64(TARGET_PAGE_SIZE),
1158 hwaddr start, end;
1160 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1162 if (!(existing->mr->subpage)) {
1163 subpage = subpage_init(d->as, base);
1164 subsection.address_space = d->as;
1165 subsection.mr = &subpage->iomem;
1166 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1167 phys_section_add(&d->map, &subsection));
1168 } else {
1169 subpage = container_of(existing->mr, subpage_t, iomem);
1171 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1172 end = start + int128_get64(section->size) - 1;
1173 subpage_register(subpage, start, end,
1174 phys_section_add(&d->map, section));
1178 static void register_multipage(AddressSpaceDispatch *d,
1179 MemoryRegionSection *section)
1181 hwaddr start_addr = section->offset_within_address_space;
1182 uint16_t section_index = phys_section_add(&d->map, section);
1183 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1184 TARGET_PAGE_BITS));
1186 assert(num_pages);
1187 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1190 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1192 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1193 AddressSpaceDispatch *d = as->next_dispatch;
1194 MemoryRegionSection now = *section, remain = *section;
1195 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1197 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1198 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1199 - now.offset_within_address_space;
1201 now.size = int128_min(int128_make64(left), now.size);
1202 register_subpage(d, &now);
1203 } else {
1204 now.size = int128_zero();
1206 while (int128_ne(remain.size, now.size)) {
1207 remain.size = int128_sub(remain.size, now.size);
1208 remain.offset_within_address_space += int128_get64(now.size);
1209 remain.offset_within_region += int128_get64(now.size);
1210 now = remain;
1211 if (int128_lt(remain.size, page_size)) {
1212 register_subpage(d, &now);
1213 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1214 now.size = page_size;
1215 register_subpage(d, &now);
1216 } else {
1217 now.size = int128_and(now.size, int128_neg(page_size));
1218 register_multipage(d, &now);
1223 void qemu_flush_coalesced_mmio_buffer(void)
1225 if (kvm_enabled())
1226 kvm_flush_coalesced_mmio_buffer();
1229 void qemu_mutex_lock_ramlist(void)
1231 qemu_mutex_lock(&ram_list.mutex);
1234 void qemu_mutex_unlock_ramlist(void)
1236 qemu_mutex_unlock(&ram_list.mutex);
1239 #ifdef __linux__
1240 static int64_t get_file_size(int fd)
1242 int64_t size = lseek(fd, 0, SEEK_END);
1243 if (size < 0) {
1244 return -errno;
1246 return size;
1249 static void *file_ram_alloc(RAMBlock *block,
1250 ram_addr_t memory,
1251 const char *path,
1252 Error **errp)
1254 bool unlink_on_error = false;
1255 char *filename;
1256 char *sanitized_name;
1257 char *c;
1258 void * volatile area = MAP_FAILED;
1259 int fd = -1;
1260 int64_t file_size;
1262 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1263 error_setg(errp,
1264 "host lacks kvm mmu notifiers, -mem-path unsupported");
1265 return NULL;
1268 for (;;) {
1269 fd = open(path, O_RDWR);
1270 if (fd >= 0) {
1271 /* @path names an existing file, use it */
1272 break;
1274 if (errno == ENOENT) {
1275 /* @path names a file that doesn't exist, create it */
1276 fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1277 if (fd >= 0) {
1278 unlink_on_error = true;
1279 break;
1281 } else if (errno == EISDIR) {
1282 /* @path names a directory, create a file there */
1283 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1284 sanitized_name = g_strdup(memory_region_name(block->mr));
1285 for (c = sanitized_name; *c != '\0'; c++) {
1286 if (*c == '/') {
1287 *c = '_';
1291 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1292 sanitized_name);
1293 g_free(sanitized_name);
1295 fd = mkstemp(filename);
1296 if (fd >= 0) {
1297 unlink(filename);
1298 g_free(filename);
1299 break;
1301 g_free(filename);
1303 if (errno != EEXIST && errno != EINTR) {
1304 error_setg_errno(errp, errno,
1305 "can't open backing store %s for guest RAM",
1306 path);
1307 goto error;
1310 * Try again on EINTR and EEXIST. The latter happens when
1311 * something else creates the file between our two open().
1315 block->page_size = qemu_fd_getpagesize(fd);
1316 block->mr->align = block->page_size;
1317 #if defined(__s390x__)
1318 if (kvm_enabled()) {
1319 block->mr->align = MAX(block->mr->align, QEMU_VMALLOC_ALIGN);
1321 #endif
1323 file_size = get_file_size(fd);
1325 if (memory < block->page_size) {
1326 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1327 "or larger than page size 0x%zx",
1328 memory, block->page_size);
1329 goto error;
1332 if (file_size > 0 && file_size < memory) {
1333 error_setg(errp, "backing store %s size 0x%" PRIx64
1334 " does not match 'size' option 0x" RAM_ADDR_FMT,
1335 path, file_size, memory);
1336 goto error;
1339 memory = ROUND_UP(memory, block->page_size);
1342 * ftruncate is not supported by hugetlbfs in older
1343 * hosts, so don't bother bailing out on errors.
1344 * If anything goes wrong with it under other filesystems,
1345 * mmap will fail.
1347 * Do not truncate the non-empty backend file to avoid corrupting
1348 * the existing data in the file. Disabling shrinking is not
1349 * enough. For example, the current vNVDIMM implementation stores
1350 * the guest NVDIMM labels at the end of the backend file. If the
1351 * backend file is later extended, QEMU will not be able to find
1352 * those labels. Therefore, extending the non-empty backend file
1353 * is disabled as well.
1355 if (!file_size && ftruncate(fd, memory)) {
1356 perror("ftruncate");
1359 area = qemu_ram_mmap(fd, memory, block->mr->align,
1360 block->flags & RAM_SHARED);
1361 if (area == MAP_FAILED) {
1362 error_setg_errno(errp, errno,
1363 "unable to map backing store for guest RAM");
1364 goto error;
1367 if (mem_prealloc) {
1368 os_mem_prealloc(fd, area, memory, errp);
1369 if (errp && *errp) {
1370 goto error;
1374 block->fd = fd;
1375 return area;
1377 error:
1378 if (area != MAP_FAILED) {
1379 qemu_ram_munmap(area, memory);
1381 if (unlink_on_error) {
1382 unlink(path);
1384 if (fd != -1) {
1385 close(fd);
1387 return NULL;
1389 #endif
1391 /* Called with the ramlist lock held. */
1392 static ram_addr_t find_ram_offset(ram_addr_t size)
1394 RAMBlock *block, *next_block;
1395 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1397 assert(size != 0); /* it would hand out same offset multiple times */
1399 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1400 return 0;
1403 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1404 ram_addr_t end, next = RAM_ADDR_MAX;
1406 end = block->offset + block->max_length;
1408 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1409 if (next_block->offset >= end) {
1410 next = MIN(next, next_block->offset);
1413 if (next - end >= size && next - end < mingap) {
1414 offset = end;
1415 mingap = next - end;
1419 if (offset == RAM_ADDR_MAX) {
1420 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1421 (uint64_t)size);
1422 abort();
1425 return offset;
1428 ram_addr_t last_ram_offset(void)
1430 RAMBlock *block;
1431 ram_addr_t last = 0;
1433 rcu_read_lock();
1434 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1435 last = MAX(last, block->offset + block->max_length);
1437 rcu_read_unlock();
1438 return last;
1441 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1443 int ret;
1445 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1446 if (!machine_dump_guest_core(current_machine)) {
1447 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1448 if (ret) {
1449 perror("qemu_madvise");
1450 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1451 "but dump_guest_core=off specified\n");
1456 const char *qemu_ram_get_idstr(RAMBlock *rb)
1458 return rb->idstr;
1461 /* Called with iothread lock held. */
1462 void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
1464 RAMBlock *block;
1466 assert(new_block);
1467 assert(!new_block->idstr[0]);
1469 if (dev) {
1470 char *id = qdev_get_dev_path(dev);
1471 if (id) {
1472 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1473 g_free(id);
1476 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1478 rcu_read_lock();
1479 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1480 if (block != new_block &&
1481 !strcmp(block->idstr, new_block->idstr)) {
1482 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1483 new_block->idstr);
1484 abort();
1487 rcu_read_unlock();
1490 /* Called with iothread lock held. */
1491 void qemu_ram_unset_idstr(RAMBlock *block)
1493 /* FIXME: arch_init.c assumes that this is not called throughout
1494 * migration. Ignore the problem since hot-unplug during migration
1495 * does not work anyway.
1497 if (block) {
1498 memset(block->idstr, 0, sizeof(block->idstr));
1502 size_t qemu_ram_pagesize(RAMBlock *rb)
1504 return rb->page_size;
1507 static int memory_try_enable_merging(void *addr, size_t len)
1509 if (!machine_mem_merge(current_machine)) {
1510 /* disabled by the user */
1511 return 0;
1514 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1517 /* Only legal before guest might have detected the memory size: e.g. on
1518 * incoming migration, or right after reset.
1520 * As memory core doesn't know how is memory accessed, it is up to
1521 * resize callback to update device state and/or add assertions to detect
1522 * misuse, if necessary.
1524 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
1526 assert(block);
1528 newsize = HOST_PAGE_ALIGN(newsize);
1530 if (block->used_length == newsize) {
1531 return 0;
1534 if (!(block->flags & RAM_RESIZEABLE)) {
1535 error_setg_errno(errp, EINVAL,
1536 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1537 " in != 0x" RAM_ADDR_FMT, block->idstr,
1538 newsize, block->used_length);
1539 return -EINVAL;
1542 if (block->max_length < newsize) {
1543 error_setg_errno(errp, EINVAL,
1544 "Length too large: %s: 0x" RAM_ADDR_FMT
1545 " > 0x" RAM_ADDR_FMT, block->idstr,
1546 newsize, block->max_length);
1547 return -EINVAL;
1550 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1551 block->used_length = newsize;
1552 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1553 DIRTY_CLIENTS_ALL);
1554 memory_region_set_size(block->mr, newsize);
1555 if (block->resized) {
1556 block->resized(block->idstr, newsize, block->host);
1558 return 0;
1561 /* Called with ram_list.mutex held */
1562 static void dirty_memory_extend(ram_addr_t old_ram_size,
1563 ram_addr_t new_ram_size)
1565 ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1566 DIRTY_MEMORY_BLOCK_SIZE);
1567 ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1568 DIRTY_MEMORY_BLOCK_SIZE);
1569 int i;
1571 /* Only need to extend if block count increased */
1572 if (new_num_blocks <= old_num_blocks) {
1573 return;
1576 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1577 DirtyMemoryBlocks *old_blocks;
1578 DirtyMemoryBlocks *new_blocks;
1579 int j;
1581 old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1582 new_blocks = g_malloc(sizeof(*new_blocks) +
1583 sizeof(new_blocks->blocks[0]) * new_num_blocks);
1585 if (old_num_blocks) {
1586 memcpy(new_blocks->blocks, old_blocks->blocks,
1587 old_num_blocks * sizeof(old_blocks->blocks[0]));
1590 for (j = old_num_blocks; j < new_num_blocks; j++) {
1591 new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1594 atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1596 if (old_blocks) {
1597 g_free_rcu(old_blocks, rcu);
1602 static void ram_block_add(RAMBlock *new_block, Error **errp)
1604 RAMBlock *block;
1605 RAMBlock *last_block = NULL;
1606 ram_addr_t old_ram_size, new_ram_size;
1607 Error *err = NULL;
1609 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1611 qemu_mutex_lock_ramlist();
1612 new_block->offset = find_ram_offset(new_block->max_length);
1614 if (!new_block->host) {
1615 if (xen_enabled()) {
1616 xen_ram_alloc(new_block->offset, new_block->max_length,
1617 new_block->mr, &err);
1618 if (err) {
1619 error_propagate(errp, err);
1620 qemu_mutex_unlock_ramlist();
1621 return;
1623 } else {
1624 new_block->host = phys_mem_alloc(new_block->max_length,
1625 &new_block->mr->align);
1627 * In Hax, the qemu allocate the virtual address, and HAX kernel
1628 * populate the memory with physical memory. Currently we have no
1629 * paging, so user should make sure enough free memory in advance
1631 if (hax_enabled()) {
1632 int ret;
1633 ret = hax_populate_ram((uint64_t)(uintptr_t)new_block->host,
1634 new_block->max_length);
1635 if (ret < 0) {
1636 error_setg(errp, "Hax failed to populate ram");
1637 return;
1641 if (!new_block->host) {
1642 error_setg_errno(errp, errno,
1643 "cannot set up guest memory '%s'",
1644 memory_region_name(new_block->mr));
1645 qemu_mutex_unlock_ramlist();
1646 return;
1648 memory_try_enable_merging(new_block->host, new_block->max_length);
1652 new_ram_size = MAX(old_ram_size,
1653 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1654 if (new_ram_size > old_ram_size) {
1655 migration_bitmap_extend(old_ram_size, new_ram_size);
1656 dirty_memory_extend(old_ram_size, new_ram_size);
1658 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1659 * QLIST (which has an RCU-friendly variant) does not have insertion at
1660 * tail, so save the last element in last_block.
1662 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1663 last_block = block;
1664 if (block->max_length < new_block->max_length) {
1665 break;
1668 if (block) {
1669 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1670 } else if (last_block) {
1671 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1672 } else { /* list is empty */
1673 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1675 ram_list.mru_block = NULL;
1677 /* Write list before version */
1678 smp_wmb();
1679 ram_list.version++;
1680 qemu_mutex_unlock_ramlist();
1682 cpu_physical_memory_set_dirty_range(new_block->offset,
1683 new_block->used_length,
1684 DIRTY_CLIENTS_ALL);
1686 if (new_block->host) {
1687 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1688 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1689 /* MADV_DONTFORK is also needed by KVM in absence of synchronous MMU */
1690 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1694 #ifdef __linux__
1695 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1696 bool share, const char *mem_path,
1697 Error **errp)
1699 RAMBlock *new_block;
1700 Error *local_err = NULL;
1702 if (xen_enabled()) {
1703 error_setg(errp, "-mem-path not supported with Xen");
1704 return NULL;
1707 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1709 * file_ram_alloc() needs to allocate just like
1710 * phys_mem_alloc, but we haven't bothered to provide
1711 * a hook there.
1713 error_setg(errp,
1714 "-mem-path not supported with this accelerator");
1715 return NULL;
1718 size = HOST_PAGE_ALIGN(size);
1719 new_block = g_malloc0(sizeof(*new_block));
1720 new_block->mr = mr;
1721 new_block->used_length = size;
1722 new_block->max_length = size;
1723 new_block->flags = share ? RAM_SHARED : 0;
1724 new_block->host = file_ram_alloc(new_block, size,
1725 mem_path, errp);
1726 if (!new_block->host) {
1727 g_free(new_block);
1728 return NULL;
1731 ram_block_add(new_block, &local_err);
1732 if (local_err) {
1733 g_free(new_block);
1734 error_propagate(errp, local_err);
1735 return NULL;
1737 return new_block;
1739 #endif
1741 static
1742 RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1743 void (*resized)(const char*,
1744 uint64_t length,
1745 void *host),
1746 void *host, bool resizeable,
1747 MemoryRegion *mr, Error **errp)
1749 RAMBlock *new_block;
1750 Error *local_err = NULL;
1752 size = HOST_PAGE_ALIGN(size);
1753 max_size = HOST_PAGE_ALIGN(max_size);
1754 new_block = g_malloc0(sizeof(*new_block));
1755 new_block->mr = mr;
1756 new_block->resized = resized;
1757 new_block->used_length = size;
1758 new_block->max_length = max_size;
1759 assert(max_size >= size);
1760 new_block->fd = -1;
1761 new_block->page_size = getpagesize();
1762 new_block->host = host;
1763 if (host) {
1764 new_block->flags |= RAM_PREALLOC;
1766 if (resizeable) {
1767 new_block->flags |= RAM_RESIZEABLE;
1769 ram_block_add(new_block, &local_err);
1770 if (local_err) {
1771 g_free(new_block);
1772 error_propagate(errp, local_err);
1773 return NULL;
1775 return new_block;
1778 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1779 MemoryRegion *mr, Error **errp)
1781 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1784 RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1786 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1789 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1790 void (*resized)(const char*,
1791 uint64_t length,
1792 void *host),
1793 MemoryRegion *mr, Error **errp)
1795 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1798 static void reclaim_ramblock(RAMBlock *block)
1800 if (block->flags & RAM_PREALLOC) {
1802 } else if (xen_enabled()) {
1803 xen_invalidate_map_cache_entry(block->host);
1804 #ifndef _WIN32
1805 } else if (block->fd >= 0) {
1806 qemu_ram_munmap(block->host, block->max_length);
1807 close(block->fd);
1808 #endif
1809 } else {
1810 qemu_anon_ram_free(block->host, block->max_length);
1812 g_free(block);
1815 void qemu_ram_free(RAMBlock *block)
1817 if (!block) {
1818 return;
1821 qemu_mutex_lock_ramlist();
1822 QLIST_REMOVE_RCU(block, next);
1823 ram_list.mru_block = NULL;
1824 /* Write list before version */
1825 smp_wmb();
1826 ram_list.version++;
1827 call_rcu(block, reclaim_ramblock, rcu);
1828 qemu_mutex_unlock_ramlist();
1831 #ifndef _WIN32
1832 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1834 RAMBlock *block;
1835 ram_addr_t offset;
1836 int flags;
1837 void *area, *vaddr;
1839 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1840 offset = addr - block->offset;
1841 if (offset < block->max_length) {
1842 vaddr = ramblock_ptr(block, offset);
1843 if (block->flags & RAM_PREALLOC) {
1845 } else if (xen_enabled()) {
1846 abort();
1847 } else {
1848 flags = MAP_FIXED;
1849 if (block->fd >= 0) {
1850 flags |= (block->flags & RAM_SHARED ?
1851 MAP_SHARED : MAP_PRIVATE);
1852 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1853 flags, block->fd, offset);
1854 } else {
1856 * Remap needs to match alloc. Accelerators that
1857 * set phys_mem_alloc never remap. If they did,
1858 * we'd need a remap hook here.
1860 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1862 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1863 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1864 flags, -1, 0);
1866 if (area != vaddr) {
1867 fprintf(stderr, "Could not remap addr: "
1868 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1869 length, addr);
1870 exit(1);
1872 memory_try_enable_merging(vaddr, length);
1873 qemu_ram_setup_dump(vaddr, length);
1878 #endif /* !_WIN32 */
1880 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1881 * This should not be used for general purpose DMA. Use address_space_map
1882 * or address_space_rw instead. For local memory (e.g. video ram) that the
1883 * device owns, use memory_region_get_ram_ptr.
1885 * Called within RCU critical section.
1887 void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1889 RAMBlock *block = ram_block;
1891 if (block == NULL) {
1892 block = qemu_get_ram_block(addr);
1893 addr -= block->offset;
1896 if (xen_enabled() && block->host == NULL) {
1897 /* We need to check if the requested address is in the RAM
1898 * because we don't want to map the entire memory in QEMU.
1899 * In that case just map until the end of the page.
1901 if (block->offset == 0) {
1902 return xen_map_cache(addr, 0, 0);
1905 block->host = xen_map_cache(block->offset, block->max_length, 1);
1907 return ramblock_ptr(block, addr);
1910 /* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr
1911 * but takes a size argument.
1913 * Called within RCU critical section.
1915 static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1916 hwaddr *size)
1918 RAMBlock *block = ram_block;
1919 if (*size == 0) {
1920 return NULL;
1923 if (block == NULL) {
1924 block = qemu_get_ram_block(addr);
1925 addr -= block->offset;
1927 *size = MIN(*size, block->max_length - addr);
1929 if (xen_enabled() && block->host == NULL) {
1930 /* We need to check if the requested address is in the RAM
1931 * because we don't want to map the entire memory in QEMU.
1932 * In that case just map the requested area.
1934 if (block->offset == 0) {
1935 return xen_map_cache(addr, *size, 1);
1938 block->host = xen_map_cache(block->offset, block->max_length, 1);
1941 return ramblock_ptr(block, addr);
1945 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1946 * in that RAMBlock.
1948 * ptr: Host pointer to look up
1949 * round_offset: If true round the result offset down to a page boundary
1950 * *ram_addr: set to result ram_addr
1951 * *offset: set to result offset within the RAMBlock
1953 * Returns: RAMBlock (or NULL if not found)
1955 * By the time this function returns, the returned pointer is not protected
1956 * by RCU anymore. If the caller is not within an RCU critical section and
1957 * does not hold the iothread lock, it must have other means of protecting the
1958 * pointer, such as a reference to the region that includes the incoming
1959 * ram_addr_t.
1961 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1962 ram_addr_t *offset)
1964 RAMBlock *block;
1965 uint8_t *host = ptr;
1967 if (xen_enabled()) {
1968 ram_addr_t ram_addr;
1969 rcu_read_lock();
1970 ram_addr = xen_ram_addr_from_mapcache(ptr);
1971 block = qemu_get_ram_block(ram_addr);
1972 if (block) {
1973 *offset = ram_addr - block->offset;
1975 rcu_read_unlock();
1976 return block;
1979 rcu_read_lock();
1980 block = atomic_rcu_read(&ram_list.mru_block);
1981 if (block && block->host && host - block->host < block->max_length) {
1982 goto found;
1985 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1986 /* This case append when the block is not mapped. */
1987 if (block->host == NULL) {
1988 continue;
1990 if (host - block->host < block->max_length) {
1991 goto found;
1995 rcu_read_unlock();
1996 return NULL;
1998 found:
1999 *offset = (host - block->host);
2000 if (round_offset) {
2001 *offset &= TARGET_PAGE_MASK;
2003 rcu_read_unlock();
2004 return block;
2008 * Finds the named RAMBlock
2010 * name: The name of RAMBlock to find
2012 * Returns: RAMBlock (or NULL if not found)
2014 RAMBlock *qemu_ram_block_by_name(const char *name)
2016 RAMBlock *block;
2018 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2019 if (!strcmp(name, block->idstr)) {
2020 return block;
2024 return NULL;
2027 /* Some of the softmmu routines need to translate from a host pointer
2028 (typically a TLB entry) back to a ram offset. */
2029 ram_addr_t qemu_ram_addr_from_host(void *ptr)
2031 RAMBlock *block;
2032 ram_addr_t offset;
2034 block = qemu_ram_block_from_host(ptr, false, &offset);
2035 if (!block) {
2036 return RAM_ADDR_INVALID;
2039 return block->offset + offset;
2042 /* Called within RCU critical section. */
2043 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2044 uint64_t val, unsigned size)
2046 bool locked = false;
2048 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
2049 locked = true;
2050 tb_lock();
2051 tb_invalidate_phys_page_fast(ram_addr, size);
2053 switch (size) {
2054 case 1:
2055 stb_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2056 break;
2057 case 2:
2058 stw_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2059 break;
2060 case 4:
2061 stl_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2062 break;
2063 default:
2064 abort();
2067 if (locked) {
2068 tb_unlock();
2071 /* Set both VGA and migration bits for simplicity and to remove
2072 * the notdirty callback faster.
2074 cpu_physical_memory_set_dirty_range(ram_addr, size,
2075 DIRTY_CLIENTS_NOCODE);
2076 /* we remove the notdirty callback only if the code has been
2077 flushed */
2078 if (!cpu_physical_memory_is_clean(ram_addr)) {
2079 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2083 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2084 unsigned size, bool is_write)
2086 return is_write;
2089 static const MemoryRegionOps notdirty_mem_ops = {
2090 .write = notdirty_mem_write,
2091 .valid.accepts = notdirty_mem_accepts,
2092 .endianness = DEVICE_NATIVE_ENDIAN,
2095 /* Generate a debug exception if a watchpoint has been hit. */
2096 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2098 CPUState *cpu = current_cpu;
2099 CPUClass *cc = CPU_GET_CLASS(cpu);
2100 CPUArchState *env = cpu->env_ptr;
2101 target_ulong pc, cs_base;
2102 target_ulong vaddr;
2103 CPUWatchpoint *wp;
2104 uint32_t cpu_flags;
2106 if (cpu->watchpoint_hit) {
2107 /* We re-entered the check after replacing the TB. Now raise
2108 * the debug interrupt so that is will trigger after the
2109 * current instruction. */
2110 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2111 return;
2113 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2114 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2115 if (cpu_watchpoint_address_matches(wp, vaddr, len)
2116 && (wp->flags & flags)) {
2117 if (flags == BP_MEM_READ) {
2118 wp->flags |= BP_WATCHPOINT_HIT_READ;
2119 } else {
2120 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2122 wp->hitaddr = vaddr;
2123 wp->hitattrs = attrs;
2124 if (!cpu->watchpoint_hit) {
2125 if (wp->flags & BP_CPU &&
2126 !cc->debug_check_watchpoint(cpu, wp)) {
2127 wp->flags &= ~BP_WATCHPOINT_HIT;
2128 continue;
2130 cpu->watchpoint_hit = wp;
2132 /* The tb_lock will be reset when cpu_loop_exit or
2133 * cpu_loop_exit_noexc longjmp back into the cpu_exec
2134 * main loop.
2136 tb_lock();
2137 tb_check_watchpoint(cpu);
2138 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2139 cpu->exception_index = EXCP_DEBUG;
2140 cpu_loop_exit(cpu);
2141 } else {
2142 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2143 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2144 cpu_loop_exit_noexc(cpu);
2147 } else {
2148 wp->flags &= ~BP_WATCHPOINT_HIT;
2153 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2154 so these check for a hit then pass through to the normal out-of-line
2155 phys routines. */
2156 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2157 unsigned size, MemTxAttrs attrs)
2159 MemTxResult res;
2160 uint64_t data;
2161 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2162 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2164 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2165 switch (size) {
2166 case 1:
2167 data = address_space_ldub(as, addr, attrs, &res);
2168 break;
2169 case 2:
2170 data = address_space_lduw(as, addr, attrs, &res);
2171 break;
2172 case 4:
2173 data = address_space_ldl(as, addr, attrs, &res);
2174 break;
2175 default: abort();
2177 *pdata = data;
2178 return res;
2181 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2182 uint64_t val, unsigned size,
2183 MemTxAttrs attrs)
2185 MemTxResult res;
2186 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2187 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2189 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2190 switch (size) {
2191 case 1:
2192 address_space_stb(as, addr, val, attrs, &res);
2193 break;
2194 case 2:
2195 address_space_stw(as, addr, val, attrs, &res);
2196 break;
2197 case 4:
2198 address_space_stl(as, addr, val, attrs, &res);
2199 break;
2200 default: abort();
2202 return res;
2205 static const MemoryRegionOps watch_mem_ops = {
2206 .read_with_attrs = watch_mem_read,
2207 .write_with_attrs = watch_mem_write,
2208 .endianness = DEVICE_NATIVE_ENDIAN,
2211 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2212 unsigned len, MemTxAttrs attrs)
2214 subpage_t *subpage = opaque;
2215 uint8_t buf[8];
2216 MemTxResult res;
2218 #if defined(DEBUG_SUBPAGE)
2219 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2220 subpage, len, addr);
2221 #endif
2222 res = address_space_read(subpage->as, addr + subpage->base,
2223 attrs, buf, len);
2224 if (res) {
2225 return res;
2227 switch (len) {
2228 case 1:
2229 *data = ldub_p(buf);
2230 return MEMTX_OK;
2231 case 2:
2232 *data = lduw_p(buf);
2233 return MEMTX_OK;
2234 case 4:
2235 *data = ldl_p(buf);
2236 return MEMTX_OK;
2237 case 8:
2238 *data = ldq_p(buf);
2239 return MEMTX_OK;
2240 default:
2241 abort();
2245 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2246 uint64_t value, unsigned len, MemTxAttrs attrs)
2248 subpage_t *subpage = opaque;
2249 uint8_t buf[8];
2251 #if defined(DEBUG_SUBPAGE)
2252 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2253 " value %"PRIx64"\n",
2254 __func__, subpage, len, addr, value);
2255 #endif
2256 switch (len) {
2257 case 1:
2258 stb_p(buf, value);
2259 break;
2260 case 2:
2261 stw_p(buf, value);
2262 break;
2263 case 4:
2264 stl_p(buf, value);
2265 break;
2266 case 8:
2267 stq_p(buf, value);
2268 break;
2269 default:
2270 abort();
2272 return address_space_write(subpage->as, addr + subpage->base,
2273 attrs, buf, len);
2276 static bool subpage_accepts(void *opaque, hwaddr addr,
2277 unsigned len, bool is_write)
2279 subpage_t *subpage = opaque;
2280 #if defined(DEBUG_SUBPAGE)
2281 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2282 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2283 #endif
2285 return address_space_access_valid(subpage->as, addr + subpage->base,
2286 len, is_write);
2289 static const MemoryRegionOps subpage_ops = {
2290 .read_with_attrs = subpage_read,
2291 .write_with_attrs = subpage_write,
2292 .impl.min_access_size = 1,
2293 .impl.max_access_size = 8,
2294 .valid.min_access_size = 1,
2295 .valid.max_access_size = 8,
2296 .valid.accepts = subpage_accepts,
2297 .endianness = DEVICE_NATIVE_ENDIAN,
2300 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2301 uint16_t section)
2303 int idx, eidx;
2305 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2306 return -1;
2307 idx = SUBPAGE_IDX(start);
2308 eidx = SUBPAGE_IDX(end);
2309 #if defined(DEBUG_SUBPAGE)
2310 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2311 __func__, mmio, start, end, idx, eidx, section);
2312 #endif
2313 for (; idx <= eidx; idx++) {
2314 mmio->sub_section[idx] = section;
2317 return 0;
2320 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2322 subpage_t *mmio;
2324 mmio = g_malloc0(sizeof(subpage_t) + TARGET_PAGE_SIZE * sizeof(uint16_t));
2325 mmio->as = as;
2326 mmio->base = base;
2327 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2328 NULL, TARGET_PAGE_SIZE);
2329 mmio->iomem.subpage = true;
2330 #if defined(DEBUG_SUBPAGE)
2331 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2332 mmio, base, TARGET_PAGE_SIZE);
2333 #endif
2334 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2336 return mmio;
2339 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2340 MemoryRegion *mr)
2342 assert(as);
2343 MemoryRegionSection section = {
2344 .address_space = as,
2345 .mr = mr,
2346 .offset_within_address_space = 0,
2347 .offset_within_region = 0,
2348 .size = int128_2_64(),
2351 return phys_section_add(map, &section);
2354 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2356 int asidx = cpu_asidx_from_attrs(cpu, attrs);
2357 CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2358 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2359 MemoryRegionSection *sections = d->map.sections;
2361 return sections[index & ~TARGET_PAGE_MASK].mr;
2364 static void io_mem_init(void)
2366 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2367 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2368 NULL, UINT64_MAX);
2369 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2370 NULL, UINT64_MAX);
2371 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2372 NULL, UINT64_MAX);
2375 static void mem_begin(MemoryListener *listener)
2377 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2378 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2379 uint16_t n;
2381 n = dummy_section(&d->map, as, &io_mem_unassigned);
2382 assert(n == PHYS_SECTION_UNASSIGNED);
2383 n = dummy_section(&d->map, as, &io_mem_notdirty);
2384 assert(n == PHYS_SECTION_NOTDIRTY);
2385 n = dummy_section(&d->map, as, &io_mem_rom);
2386 assert(n == PHYS_SECTION_ROM);
2387 n = dummy_section(&d->map, as, &io_mem_watch);
2388 assert(n == PHYS_SECTION_WATCH);
2390 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2391 d->as = as;
2392 as->next_dispatch = d;
2395 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2397 phys_sections_free(&d->map);
2398 g_free(d);
2401 static void mem_commit(MemoryListener *listener)
2403 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2404 AddressSpaceDispatch *cur = as->dispatch;
2405 AddressSpaceDispatch *next = as->next_dispatch;
2407 phys_page_compact_all(next, next->map.nodes_nb);
2409 atomic_rcu_set(&as->dispatch, next);
2410 if (cur) {
2411 call_rcu(cur, address_space_dispatch_free, rcu);
2415 static void tcg_commit(MemoryListener *listener)
2417 CPUAddressSpace *cpuas;
2418 AddressSpaceDispatch *d;
2420 /* since each CPU stores ram addresses in its TLB cache, we must
2421 reset the modified entries */
2422 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2423 cpu_reloading_memory_map();
2424 /* The CPU and TLB are protected by the iothread lock.
2425 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2426 * may have split the RCU critical section.
2428 d = atomic_rcu_read(&cpuas->as->dispatch);
2429 atomic_rcu_set(&cpuas->memory_dispatch, d);
2430 tlb_flush(cpuas->cpu, 1);
2433 void address_space_init_dispatch(AddressSpace *as)
2435 as->dispatch = NULL;
2436 as->dispatch_listener = (MemoryListener) {
2437 .begin = mem_begin,
2438 .commit = mem_commit,
2439 .region_add = mem_add,
2440 .region_nop = mem_add,
2441 .priority = 0,
2443 memory_listener_register(&as->dispatch_listener, as);
2446 void address_space_unregister(AddressSpace *as)
2448 memory_listener_unregister(&as->dispatch_listener);
2451 void address_space_destroy_dispatch(AddressSpace *as)
2453 AddressSpaceDispatch *d = as->dispatch;
2455 atomic_rcu_set(&as->dispatch, NULL);
2456 if (d) {
2457 call_rcu(d, address_space_dispatch_free, rcu);
2461 static void memory_map_init(void)
2463 system_memory = g_malloc(sizeof(*system_memory));
2465 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2466 address_space_init(&address_space_memory, system_memory, "memory");
2468 system_io = g_malloc(sizeof(*system_io));
2469 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2470 65536);
2471 address_space_init(&address_space_io, system_io, "I/O");
2474 MemoryRegion *get_system_memory(void)
2476 return system_memory;
2479 MemoryRegion *get_system_io(void)
2481 return system_io;
2484 #endif /* !defined(CONFIG_USER_ONLY) */
2486 /* physical memory access (slow version, mainly for debug) */
2487 #if defined(CONFIG_USER_ONLY)
2488 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2489 uint8_t *buf, int len, int is_write)
2491 int l, flags;
2492 target_ulong page;
2493 void * p;
2495 while (len > 0) {
2496 page = addr & TARGET_PAGE_MASK;
2497 l = (page + TARGET_PAGE_SIZE) - addr;
2498 if (l > len)
2499 l = len;
2500 flags = page_get_flags(page);
2501 if (!(flags & PAGE_VALID))
2502 return -1;
2503 if (is_write) {
2504 if (!(flags & PAGE_WRITE))
2505 return -1;
2506 /* XXX: this code should not depend on lock_user */
2507 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2508 return -1;
2509 memcpy(p, buf, l);
2510 unlock_user(p, addr, l);
2511 } else {
2512 if (!(flags & PAGE_READ))
2513 return -1;
2514 /* XXX: this code should not depend on lock_user */
2515 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2516 return -1;
2517 memcpy(buf, p, l);
2518 unlock_user(p, addr, 0);
2520 len -= l;
2521 buf += l;
2522 addr += l;
2524 return 0;
2527 #else
2529 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2530 hwaddr length)
2532 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2533 addr += memory_region_get_ram_addr(mr);
2535 /* No early return if dirty_log_mask is or becomes 0, because
2536 * cpu_physical_memory_set_dirty_range will still call
2537 * xen_modified_memory.
2539 if (dirty_log_mask) {
2540 dirty_log_mask =
2541 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2543 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2544 tb_lock();
2545 tb_invalidate_phys_range(addr, addr + length);
2546 tb_unlock();
2547 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2549 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2552 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2554 unsigned access_size_max = mr->ops->valid.max_access_size;
2556 /* Regions are assumed to support 1-4 byte accesses unless
2557 otherwise specified. */
2558 if (access_size_max == 0) {
2559 access_size_max = 4;
2562 /* Bound the maximum access by the alignment of the address. */
2563 if (!mr->ops->impl.unaligned) {
2564 unsigned align_size_max = addr & -addr;
2565 if (align_size_max != 0 && align_size_max < access_size_max) {
2566 access_size_max = align_size_max;
2570 /* Don't attempt accesses larger than the maximum. */
2571 if (l > access_size_max) {
2572 l = access_size_max;
2574 l = pow2floor(l);
2576 return l;
2579 static bool prepare_mmio_access(MemoryRegion *mr)
2581 bool unlocked = !qemu_mutex_iothread_locked();
2582 bool release_lock = false;
2584 if (unlocked && mr->global_locking) {
2585 qemu_mutex_lock_iothread();
2586 unlocked = false;
2587 release_lock = true;
2589 if (mr->flush_coalesced_mmio) {
2590 if (unlocked) {
2591 qemu_mutex_lock_iothread();
2593 qemu_flush_coalesced_mmio_buffer();
2594 if (unlocked) {
2595 qemu_mutex_unlock_iothread();
2599 return release_lock;
2602 /* Called within RCU critical section. */
2603 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2604 MemTxAttrs attrs,
2605 const uint8_t *buf,
2606 int len, hwaddr addr1,
2607 hwaddr l, MemoryRegion *mr)
2609 uint8_t *ptr;
2610 uint64_t val;
2611 MemTxResult result = MEMTX_OK;
2612 bool release_lock = false;
2614 for (;;) {
2615 if (!memory_access_is_direct(mr, true)) {
2616 release_lock |= prepare_mmio_access(mr);
2617 l = memory_access_size(mr, l, addr1);
2618 /* XXX: could force current_cpu to NULL to avoid
2619 potential bugs */
2620 switch (l) {
2621 case 8:
2622 /* 64 bit write access */
2623 val = ldq_p(buf);
2624 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2625 attrs);
2626 break;
2627 case 4:
2628 /* 32 bit write access */
2629 val = ldl_p(buf);
2630 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2631 attrs);
2632 break;
2633 case 2:
2634 /* 16 bit write access */
2635 val = lduw_p(buf);
2636 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2637 attrs);
2638 break;
2639 case 1:
2640 /* 8 bit write access */
2641 val = ldub_p(buf);
2642 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2643 attrs);
2644 break;
2645 default:
2646 abort();
2648 } else {
2649 /* RAM case */
2650 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2651 memcpy(ptr, buf, l);
2652 invalidate_and_set_dirty(mr, addr1, l);
2655 if (release_lock) {
2656 qemu_mutex_unlock_iothread();
2657 release_lock = false;
2660 len -= l;
2661 buf += l;
2662 addr += l;
2664 if (!len) {
2665 break;
2668 l = len;
2669 mr = address_space_translate(as, addr, &addr1, &l, true);
2672 return result;
2675 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2676 const uint8_t *buf, int len)
2678 hwaddr l;
2679 hwaddr addr1;
2680 MemoryRegion *mr;
2681 MemTxResult result = MEMTX_OK;
2683 if (len > 0) {
2684 rcu_read_lock();
2685 l = len;
2686 mr = address_space_translate(as, addr, &addr1, &l, true);
2687 result = address_space_write_continue(as, addr, attrs, buf, len,
2688 addr1, l, mr);
2689 rcu_read_unlock();
2692 return result;
2695 /* Called within RCU critical section. */
2696 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2697 MemTxAttrs attrs, uint8_t *buf,
2698 int len, hwaddr addr1, hwaddr l,
2699 MemoryRegion *mr)
2701 uint8_t *ptr;
2702 uint64_t val;
2703 MemTxResult result = MEMTX_OK;
2704 bool release_lock = false;
2706 for (;;) {
2707 if (!memory_access_is_direct(mr, false)) {
2708 /* I/O case */
2709 release_lock |= prepare_mmio_access(mr);
2710 l = memory_access_size(mr, l, addr1);
2711 switch (l) {
2712 case 8:
2713 /* 64 bit read access */
2714 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2715 attrs);
2716 stq_p(buf, val);
2717 break;
2718 case 4:
2719 /* 32 bit read access */
2720 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2721 attrs);
2722 stl_p(buf, val);
2723 break;
2724 case 2:
2725 /* 16 bit read access */
2726 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2727 attrs);
2728 stw_p(buf, val);
2729 break;
2730 case 1:
2731 /* 8 bit read access */
2732 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2733 attrs);
2734 stb_p(buf, val);
2735 break;
2736 default:
2737 abort();
2739 } else {
2740 /* RAM case */
2741 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2742 memcpy(buf, ptr, l);
2745 if (release_lock) {
2746 qemu_mutex_unlock_iothread();
2747 release_lock = false;
2750 len -= l;
2751 buf += l;
2752 addr += l;
2754 if (!len) {
2755 break;
2758 l = len;
2759 mr = address_space_translate(as, addr, &addr1, &l, false);
2762 return result;
2765 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2766 MemTxAttrs attrs, uint8_t *buf, int len)
2768 hwaddr l;
2769 hwaddr addr1;
2770 MemoryRegion *mr;
2771 MemTxResult result = MEMTX_OK;
2773 if (len > 0) {
2774 rcu_read_lock();
2775 l = len;
2776 mr = address_space_translate(as, addr, &addr1, &l, false);
2777 result = address_space_read_continue(as, addr, attrs, buf, len,
2778 addr1, l, mr);
2779 rcu_read_unlock();
2782 return result;
2785 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2786 uint8_t *buf, int len, bool is_write)
2788 if (is_write) {
2789 return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2790 } else {
2791 return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2795 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2796 int len, int is_write)
2798 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2799 buf, len, is_write);
2802 enum write_rom_type {
2803 WRITE_DATA,
2804 FLUSH_CACHE,
2807 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2808 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2810 hwaddr l;
2811 uint8_t *ptr;
2812 hwaddr addr1;
2813 MemoryRegion *mr;
2815 rcu_read_lock();
2816 while (len > 0) {
2817 l = len;
2818 mr = address_space_translate(as, addr, &addr1, &l, true);
2820 if (!(memory_region_is_ram(mr) ||
2821 memory_region_is_romd(mr))) {
2822 l = memory_access_size(mr, l, addr1);
2823 } else {
2824 /* ROM/RAM case */
2825 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2826 switch (type) {
2827 case WRITE_DATA:
2828 memcpy(ptr, buf, l);
2829 invalidate_and_set_dirty(mr, addr1, l);
2830 break;
2831 case FLUSH_CACHE:
2832 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2833 break;
2836 len -= l;
2837 buf += l;
2838 addr += l;
2840 rcu_read_unlock();
2843 /* used for ROM loading : can write in RAM and ROM */
2844 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2845 const uint8_t *buf, int len)
2847 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2850 void cpu_flush_icache_range(hwaddr start, int len)
2853 * This function should do the same thing as an icache flush that was
2854 * triggered from within the guest. For TCG we are always cache coherent,
2855 * so there is no need to flush anything. For KVM / Xen we need to flush
2856 * the host's instruction cache at least.
2858 if (tcg_enabled()) {
2859 return;
2862 cpu_physical_memory_write_rom_internal(&address_space_memory,
2863 start, NULL, len, FLUSH_CACHE);
2866 typedef struct {
2867 MemoryRegion *mr;
2868 void *buffer;
2869 hwaddr addr;
2870 hwaddr len;
2871 bool in_use;
2872 } BounceBuffer;
2874 static BounceBuffer bounce;
2876 typedef struct MapClient {
2877 QEMUBH *bh;
2878 QLIST_ENTRY(MapClient) link;
2879 } MapClient;
2881 QemuMutex map_client_list_lock;
2882 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2883 = QLIST_HEAD_INITIALIZER(map_client_list);
2885 static void cpu_unregister_map_client_do(MapClient *client)
2887 QLIST_REMOVE(client, link);
2888 g_free(client);
2891 static void cpu_notify_map_clients_locked(void)
2893 MapClient *client;
2895 while (!QLIST_EMPTY(&map_client_list)) {
2896 client = QLIST_FIRST(&map_client_list);
2897 qemu_bh_schedule(client->bh);
2898 cpu_unregister_map_client_do(client);
2902 void cpu_register_map_client(QEMUBH *bh)
2904 MapClient *client = g_malloc(sizeof(*client));
2906 qemu_mutex_lock(&map_client_list_lock);
2907 client->bh = bh;
2908 QLIST_INSERT_HEAD(&map_client_list, client, link);
2909 if (!atomic_read(&bounce.in_use)) {
2910 cpu_notify_map_clients_locked();
2912 qemu_mutex_unlock(&map_client_list_lock);
2915 void cpu_exec_init_all(void)
2917 qemu_mutex_init(&ram_list.mutex);
2918 /* The data structures we set up here depend on knowing the page size,
2919 * so no more changes can be made after this point.
2920 * In an ideal world, nothing we did before we had finished the
2921 * machine setup would care about the target page size, and we could
2922 * do this much later, rather than requiring board models to state
2923 * up front what their requirements are.
2925 finalize_target_page_bits();
2926 io_mem_init();
2927 memory_map_init();
2928 qemu_mutex_init(&map_client_list_lock);
2931 void cpu_unregister_map_client(QEMUBH *bh)
2933 MapClient *client;
2935 qemu_mutex_lock(&map_client_list_lock);
2936 QLIST_FOREACH(client, &map_client_list, link) {
2937 if (client->bh == bh) {
2938 cpu_unregister_map_client_do(client);
2939 break;
2942 qemu_mutex_unlock(&map_client_list_lock);
2945 static void cpu_notify_map_clients(void)
2947 qemu_mutex_lock(&map_client_list_lock);
2948 cpu_notify_map_clients_locked();
2949 qemu_mutex_unlock(&map_client_list_lock);
2952 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2954 MemoryRegion *mr;
2955 hwaddr l, xlat;
2957 rcu_read_lock();
2958 while (len > 0) {
2959 l = len;
2960 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2961 if (!memory_access_is_direct(mr, is_write)) {
2962 l = memory_access_size(mr, l, addr);
2963 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2964 return false;
2968 len -= l;
2969 addr += l;
2971 rcu_read_unlock();
2972 return true;
2975 /* Map a physical memory region into a host virtual address.
2976 * May map a subset of the requested range, given by and returned in *plen.
2977 * May return NULL if resources needed to perform the mapping are exhausted.
2978 * Use only for reads OR writes - not for read-modify-write operations.
2979 * Use cpu_register_map_client() to know when retrying the map operation is
2980 * likely to succeed.
2982 void *address_space_map(AddressSpace *as,
2983 hwaddr addr,
2984 hwaddr *plen,
2985 bool is_write)
2987 hwaddr len = *plen;
2988 hwaddr done = 0;
2989 hwaddr l, xlat, base;
2990 MemoryRegion *mr, *this_mr;
2991 void *ptr;
2993 if (len == 0) {
2994 return NULL;
2997 l = len;
2998 rcu_read_lock();
2999 mr = address_space_translate(as, addr, &xlat, &l, is_write);
3001 if (!memory_access_is_direct(mr, is_write)) {
3002 if (atomic_xchg(&bounce.in_use, true)) {
3003 rcu_read_unlock();
3004 return NULL;
3006 /* Avoid unbounded allocations */
3007 l = MIN(l, TARGET_PAGE_SIZE);
3008 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
3009 bounce.addr = addr;
3010 bounce.len = l;
3012 memory_region_ref(mr);
3013 bounce.mr = mr;
3014 if (!is_write) {
3015 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
3016 bounce.buffer, l);
3019 rcu_read_unlock();
3020 *plen = l;
3021 return bounce.buffer;
3024 base = xlat;
3026 for (;;) {
3027 len -= l;
3028 addr += l;
3029 done += l;
3030 if (len == 0) {
3031 break;
3034 l = len;
3035 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
3036 if (this_mr != mr || xlat != base + done) {
3037 break;
3041 memory_region_ref(mr);
3042 *plen = done;
3043 ptr = qemu_ram_ptr_length(mr->ram_block, base, plen);
3044 rcu_read_unlock();
3046 return ptr;
3049 /* Unmaps a memory region previously mapped by address_space_map().
3050 * Will also mark the memory as dirty if is_write == 1. access_len gives
3051 * the amount of memory that was actually read or written by the caller.
3053 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3054 int is_write, hwaddr access_len)
3056 if (buffer != bounce.buffer) {
3057 MemoryRegion *mr;
3058 ram_addr_t addr1;
3060 mr = memory_region_from_host(buffer, &addr1);
3061 assert(mr != NULL);
3062 if (is_write) {
3063 invalidate_and_set_dirty(mr, addr1, access_len);
3065 if (xen_enabled()) {
3066 xen_invalidate_map_cache_entry(buffer);
3068 memory_region_unref(mr);
3069 return;
3071 if (is_write) {
3072 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
3073 bounce.buffer, access_len);
3075 qemu_vfree(bounce.buffer);
3076 bounce.buffer = NULL;
3077 memory_region_unref(bounce.mr);
3078 atomic_mb_set(&bounce.in_use, false);
3079 cpu_notify_map_clients();
3082 void *cpu_physical_memory_map(hwaddr addr,
3083 hwaddr *plen,
3084 int is_write)
3086 return address_space_map(&address_space_memory, addr, plen, is_write);
3089 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3090 int is_write, hwaddr access_len)
3092 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3095 /* warning: addr must be aligned */
3096 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3097 MemTxAttrs attrs,
3098 MemTxResult *result,
3099 enum device_endian endian)
3101 uint8_t *ptr;
3102 uint64_t val;
3103 MemoryRegion *mr;
3104 hwaddr l = 4;
3105 hwaddr addr1;
3106 MemTxResult r;
3107 bool release_lock = false;
3109 rcu_read_lock();
3110 mr = address_space_translate(as, addr, &addr1, &l, false);
3111 if (l < 4 || !memory_access_is_direct(mr, false)) {
3112 release_lock |= prepare_mmio_access(mr);
3114 /* I/O case */
3115 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3116 #if defined(TARGET_WORDS_BIGENDIAN)
3117 if (endian == DEVICE_LITTLE_ENDIAN) {
3118 val = bswap32(val);
3120 #else
3121 if (endian == DEVICE_BIG_ENDIAN) {
3122 val = bswap32(val);
3124 #endif
3125 } else {
3126 /* RAM case */
3127 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3128 switch (endian) {
3129 case DEVICE_LITTLE_ENDIAN:
3130 val = ldl_le_p(ptr);
3131 break;
3132 case DEVICE_BIG_ENDIAN:
3133 val = ldl_be_p(ptr);
3134 break;
3135 default:
3136 val = ldl_p(ptr);
3137 break;
3139 r = MEMTX_OK;
3141 if (result) {
3142 *result = r;
3144 if (release_lock) {
3145 qemu_mutex_unlock_iothread();
3147 rcu_read_unlock();
3148 return val;
3151 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3152 MemTxAttrs attrs, MemTxResult *result)
3154 return address_space_ldl_internal(as, addr, attrs, result,
3155 DEVICE_NATIVE_ENDIAN);
3158 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3159 MemTxAttrs attrs, MemTxResult *result)
3161 return address_space_ldl_internal(as, addr, attrs, result,
3162 DEVICE_LITTLE_ENDIAN);
3165 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3166 MemTxAttrs attrs, MemTxResult *result)
3168 return address_space_ldl_internal(as, addr, attrs, result,
3169 DEVICE_BIG_ENDIAN);
3172 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3174 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3177 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3179 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3182 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3184 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3187 /* warning: addr must be aligned */
3188 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3189 MemTxAttrs attrs,
3190 MemTxResult *result,
3191 enum device_endian endian)
3193 uint8_t *ptr;
3194 uint64_t val;
3195 MemoryRegion *mr;
3196 hwaddr l = 8;
3197 hwaddr addr1;
3198 MemTxResult r;
3199 bool release_lock = false;
3201 rcu_read_lock();
3202 mr = address_space_translate(as, addr, &addr1, &l,
3203 false);
3204 if (l < 8 || !memory_access_is_direct(mr, false)) {
3205 release_lock |= prepare_mmio_access(mr);
3207 /* I/O case */
3208 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3209 #if defined(TARGET_WORDS_BIGENDIAN)
3210 if (endian == DEVICE_LITTLE_ENDIAN) {
3211 val = bswap64(val);
3213 #else
3214 if (endian == DEVICE_BIG_ENDIAN) {
3215 val = bswap64(val);
3217 #endif
3218 } else {
3219 /* RAM case */
3220 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3221 switch (endian) {
3222 case DEVICE_LITTLE_ENDIAN:
3223 val = ldq_le_p(ptr);
3224 break;
3225 case DEVICE_BIG_ENDIAN:
3226 val = ldq_be_p(ptr);
3227 break;
3228 default:
3229 val = ldq_p(ptr);
3230 break;
3232 r = MEMTX_OK;
3234 if (result) {
3235 *result = r;
3237 if (release_lock) {
3238 qemu_mutex_unlock_iothread();
3240 rcu_read_unlock();
3241 return val;
3244 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3245 MemTxAttrs attrs, MemTxResult *result)
3247 return address_space_ldq_internal(as, addr, attrs, result,
3248 DEVICE_NATIVE_ENDIAN);
3251 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3252 MemTxAttrs attrs, MemTxResult *result)
3254 return address_space_ldq_internal(as, addr, attrs, result,
3255 DEVICE_LITTLE_ENDIAN);
3258 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3259 MemTxAttrs attrs, MemTxResult *result)
3261 return address_space_ldq_internal(as, addr, attrs, result,
3262 DEVICE_BIG_ENDIAN);
3265 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3267 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3270 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3272 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3275 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3277 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3280 /* XXX: optimize */
3281 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3282 MemTxAttrs attrs, MemTxResult *result)
3284 uint8_t val;
3285 MemTxResult r;
3287 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3288 if (result) {
3289 *result = r;
3291 return val;
3294 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3296 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3299 /* warning: addr must be aligned */
3300 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3301 hwaddr addr,
3302 MemTxAttrs attrs,
3303 MemTxResult *result,
3304 enum device_endian endian)
3306 uint8_t *ptr;
3307 uint64_t val;
3308 MemoryRegion *mr;
3309 hwaddr l = 2;
3310 hwaddr addr1;
3311 MemTxResult r;
3312 bool release_lock = false;
3314 rcu_read_lock();
3315 mr = address_space_translate(as, addr, &addr1, &l,
3316 false);
3317 if (l < 2 || !memory_access_is_direct(mr, false)) {
3318 release_lock |= prepare_mmio_access(mr);
3320 /* I/O case */
3321 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3322 #if defined(TARGET_WORDS_BIGENDIAN)
3323 if (endian == DEVICE_LITTLE_ENDIAN) {
3324 val = bswap16(val);
3326 #else
3327 if (endian == DEVICE_BIG_ENDIAN) {
3328 val = bswap16(val);
3330 #endif
3331 } else {
3332 /* RAM case */
3333 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3334 switch (endian) {
3335 case DEVICE_LITTLE_ENDIAN:
3336 val = lduw_le_p(ptr);
3337 break;
3338 case DEVICE_BIG_ENDIAN:
3339 val = lduw_be_p(ptr);
3340 break;
3341 default:
3342 val = lduw_p(ptr);
3343 break;
3345 r = MEMTX_OK;
3347 if (result) {
3348 *result = r;
3350 if (release_lock) {
3351 qemu_mutex_unlock_iothread();
3353 rcu_read_unlock();
3354 return val;
3357 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3358 MemTxAttrs attrs, MemTxResult *result)
3360 return address_space_lduw_internal(as, addr, attrs, result,
3361 DEVICE_NATIVE_ENDIAN);
3364 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3365 MemTxAttrs attrs, MemTxResult *result)
3367 return address_space_lduw_internal(as, addr, attrs, result,
3368 DEVICE_LITTLE_ENDIAN);
3371 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3372 MemTxAttrs attrs, MemTxResult *result)
3374 return address_space_lduw_internal(as, addr, attrs, result,
3375 DEVICE_BIG_ENDIAN);
3378 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3380 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3383 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3385 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3388 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3390 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3393 /* warning: addr must be aligned. The ram page is not masked as dirty
3394 and the code inside is not invalidated. It is useful if the dirty
3395 bits are used to track modified PTEs */
3396 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3397 MemTxAttrs attrs, MemTxResult *result)
3399 uint8_t *ptr;
3400 MemoryRegion *mr;
3401 hwaddr l = 4;
3402 hwaddr addr1;
3403 MemTxResult r;
3404 uint8_t dirty_log_mask;
3405 bool release_lock = false;
3407 rcu_read_lock();
3408 mr = address_space_translate(as, addr, &addr1, &l,
3409 true);
3410 if (l < 4 || !memory_access_is_direct(mr, true)) {
3411 release_lock |= prepare_mmio_access(mr);
3413 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3414 } else {
3415 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3416 stl_p(ptr, val);
3418 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3419 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3420 cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr,
3421 4, dirty_log_mask);
3422 r = MEMTX_OK;
3424 if (result) {
3425 *result = r;
3427 if (release_lock) {
3428 qemu_mutex_unlock_iothread();
3430 rcu_read_unlock();
3433 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3435 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3438 /* warning: addr must be aligned */
3439 static inline void address_space_stl_internal(AddressSpace *as,
3440 hwaddr addr, uint32_t val,
3441 MemTxAttrs attrs,
3442 MemTxResult *result,
3443 enum device_endian endian)
3445 uint8_t *ptr;
3446 MemoryRegion *mr;
3447 hwaddr l = 4;
3448 hwaddr addr1;
3449 MemTxResult r;
3450 bool release_lock = false;
3452 rcu_read_lock();
3453 mr = address_space_translate(as, addr, &addr1, &l,
3454 true);
3455 if (l < 4 || !memory_access_is_direct(mr, true)) {
3456 release_lock |= prepare_mmio_access(mr);
3458 #if defined(TARGET_WORDS_BIGENDIAN)
3459 if (endian == DEVICE_LITTLE_ENDIAN) {
3460 val = bswap32(val);
3462 #else
3463 if (endian == DEVICE_BIG_ENDIAN) {
3464 val = bswap32(val);
3466 #endif
3467 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3468 } else {
3469 /* RAM case */
3470 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3471 switch (endian) {
3472 case DEVICE_LITTLE_ENDIAN:
3473 stl_le_p(ptr, val);
3474 break;
3475 case DEVICE_BIG_ENDIAN:
3476 stl_be_p(ptr, val);
3477 break;
3478 default:
3479 stl_p(ptr, val);
3480 break;
3482 invalidate_and_set_dirty(mr, addr1, 4);
3483 r = MEMTX_OK;
3485 if (result) {
3486 *result = r;
3488 if (release_lock) {
3489 qemu_mutex_unlock_iothread();
3491 rcu_read_unlock();
3494 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3495 MemTxAttrs attrs, MemTxResult *result)
3497 address_space_stl_internal(as, addr, val, attrs, result,
3498 DEVICE_NATIVE_ENDIAN);
3501 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3502 MemTxAttrs attrs, MemTxResult *result)
3504 address_space_stl_internal(as, addr, val, attrs, result,
3505 DEVICE_LITTLE_ENDIAN);
3508 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3509 MemTxAttrs attrs, MemTxResult *result)
3511 address_space_stl_internal(as, addr, val, attrs, result,
3512 DEVICE_BIG_ENDIAN);
3515 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3517 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3520 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3522 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3525 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3527 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3530 /* XXX: optimize */
3531 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3532 MemTxAttrs attrs, MemTxResult *result)
3534 uint8_t v = val;
3535 MemTxResult r;
3537 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3538 if (result) {
3539 *result = r;
3543 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3545 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3548 /* warning: addr must be aligned */
3549 static inline void address_space_stw_internal(AddressSpace *as,
3550 hwaddr addr, uint32_t val,
3551 MemTxAttrs attrs,
3552 MemTxResult *result,
3553 enum device_endian endian)
3555 uint8_t *ptr;
3556 MemoryRegion *mr;
3557 hwaddr l = 2;
3558 hwaddr addr1;
3559 MemTxResult r;
3560 bool release_lock = false;
3562 rcu_read_lock();
3563 mr = address_space_translate(as, addr, &addr1, &l, true);
3564 if (l < 2 || !memory_access_is_direct(mr, true)) {
3565 release_lock |= prepare_mmio_access(mr);
3567 #if defined(TARGET_WORDS_BIGENDIAN)
3568 if (endian == DEVICE_LITTLE_ENDIAN) {
3569 val = bswap16(val);
3571 #else
3572 if (endian == DEVICE_BIG_ENDIAN) {
3573 val = bswap16(val);
3575 #endif
3576 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3577 } else {
3578 /* RAM case */
3579 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3580 switch (endian) {
3581 case DEVICE_LITTLE_ENDIAN:
3582 stw_le_p(ptr, val);
3583 break;
3584 case DEVICE_BIG_ENDIAN:
3585 stw_be_p(ptr, val);
3586 break;
3587 default:
3588 stw_p(ptr, val);
3589 break;
3591 invalidate_and_set_dirty(mr, addr1, 2);
3592 r = MEMTX_OK;
3594 if (result) {
3595 *result = r;
3597 if (release_lock) {
3598 qemu_mutex_unlock_iothread();
3600 rcu_read_unlock();
3603 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3604 MemTxAttrs attrs, MemTxResult *result)
3606 address_space_stw_internal(as, addr, val, attrs, result,
3607 DEVICE_NATIVE_ENDIAN);
3610 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3611 MemTxAttrs attrs, MemTxResult *result)
3613 address_space_stw_internal(as, addr, val, attrs, result,
3614 DEVICE_LITTLE_ENDIAN);
3617 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3618 MemTxAttrs attrs, MemTxResult *result)
3620 address_space_stw_internal(as, addr, val, attrs, result,
3621 DEVICE_BIG_ENDIAN);
3624 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3626 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3629 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3631 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3634 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3636 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3639 /* XXX: optimize */
3640 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3641 MemTxAttrs attrs, MemTxResult *result)
3643 MemTxResult r;
3644 val = tswap64(val);
3645 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3646 if (result) {
3647 *result = r;
3651 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3652 MemTxAttrs attrs, MemTxResult *result)
3654 MemTxResult r;
3655 val = cpu_to_le64(val);
3656 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3657 if (result) {
3658 *result = r;
3661 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3662 MemTxAttrs attrs, MemTxResult *result)
3664 MemTxResult r;
3665 val = cpu_to_be64(val);
3666 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3667 if (result) {
3668 *result = r;
3672 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3674 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3677 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3679 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3682 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3684 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3687 /* virtual memory access for debug (includes writing to ROM) */
3688 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3689 uint8_t *buf, int len, int is_write)
3691 int l;
3692 hwaddr phys_addr;
3693 target_ulong page;
3695 while (len > 0) {
3696 int asidx;
3697 MemTxAttrs attrs;
3699 page = addr & TARGET_PAGE_MASK;
3700 phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3701 asidx = cpu_asidx_from_attrs(cpu, attrs);
3702 /* if no physical page mapped, return an error */
3703 if (phys_addr == -1)
3704 return -1;
3705 l = (page + TARGET_PAGE_SIZE) - addr;
3706 if (l > len)
3707 l = len;
3708 phys_addr += (addr & ~TARGET_PAGE_MASK);
3709 if (is_write) {
3710 cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3711 phys_addr, buf, l);
3712 } else {
3713 address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3714 MEMTXATTRS_UNSPECIFIED,
3715 buf, l, 0);
3717 len -= l;
3718 buf += l;
3719 addr += l;
3721 return 0;
3725 * Allows code that needs to deal with migration bitmaps etc to still be built
3726 * target independent.
3728 size_t qemu_target_page_bits(void)
3730 return TARGET_PAGE_BITS;
3733 #endif
3736 * A helper function for the _utterly broken_ virtio device model to find out if
3737 * it's running on a big endian machine. Don't do this at home kids!
3739 bool target_words_bigendian(void);
3740 bool target_words_bigendian(void)
3742 #if defined(TARGET_WORDS_BIGENDIAN)
3743 return true;
3744 #else
3745 return false;
3746 #endif
3749 #ifndef CONFIG_USER_ONLY
3750 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3752 MemoryRegion*mr;
3753 hwaddr l = 1;
3754 bool res;
3756 rcu_read_lock();
3757 mr = address_space_translate(&address_space_memory,
3758 phys_addr, &phys_addr, &l, false);
3760 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3761 rcu_read_unlock();
3762 return res;
3765 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3767 RAMBlock *block;
3768 int ret = 0;
3770 rcu_read_lock();
3771 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3772 ret = func(block->idstr, block->host, block->offset,
3773 block->used_length, opaque);
3774 if (ret) {
3775 break;
3778 rcu_read_unlock();
3779 return ret;
3781 #endif