Fix merge of HAXM support
[qemu/ar7.git] / exec.c
blobc4dc14376f74a51cccf0cfbeaba1ae4a06fb2854
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include "qapi/error.h"
21 #ifndef _WIN32
22 #endif
24 #include "qemu/cutils.h"
25 #include "cpu.h"
26 #include "exec/exec-all.h"
27 #include "tcg.h"
28 #include "hw/qdev-core.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
31 #include "hw/xen/xen.h"
32 #endif
33 #include "sysemu/kvm.h"
34 #include "sysemu/hax.h"
35 #include "sysemu/sysemu.h"
36 #include "qemu/timer.h"
37 #include "qemu/config-file.h"
38 #include "qemu/error-report.h"
39 #if defined(CONFIG_USER_ONLY)
40 #include "qemu.h"
41 #else /* !CONFIG_USER_ONLY */
42 #include "hw/hw.h"
43 #include "exec/memory.h"
44 #include "exec/ioport.h"
45 #include "sysemu/dma.h"
46 #include "exec/address-spaces.h"
47 #include "sysemu/xen-mapcache.h"
48 #include "trace.h"
49 #endif
50 #include "exec/cpu-all.h"
51 #include "qemu/rcu_queue.h"
52 #include "qemu/main-loop.h"
53 #include "translate-all.h"
54 #include "sysemu/replay.h"
56 #include "exec/memory-internal.h"
57 #include "exec/ram_addr.h"
58 #include "exec/log.h"
60 #include "migration/vmstate.h"
62 #include "qemu/range.h"
63 #ifndef _WIN32
64 #include "qemu/mmap-alloc.h"
65 #endif
67 //#define DEBUG_SUBPAGE
69 #if !defined(CONFIG_USER_ONLY)
70 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
71 * are protected by the ramlist lock.
73 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
75 static MemoryRegion *system_memory;
76 static MemoryRegion *system_io;
78 AddressSpace address_space_io;
79 AddressSpace address_space_memory;
81 MemoryRegion io_mem_rom, io_mem_notdirty;
82 static MemoryRegion io_mem_unassigned;
84 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
85 #define RAM_PREALLOC (1 << 0)
87 /* RAM is mmap-ed with MAP_SHARED */
88 #define RAM_SHARED (1 << 1)
90 /* Only a portion of RAM (used_length) is actually used, and migrated.
91 * This used_length size can change across reboots.
93 #define RAM_RESIZEABLE (1 << 2)
95 #endif
97 #ifdef TARGET_PAGE_BITS_VARY
98 int target_page_bits;
99 bool target_page_bits_decided;
100 #endif
102 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
103 /* current CPU in the current thread. It is only valid inside
104 cpu_exec() */
105 __thread CPUState *current_cpu;
106 /* 0 = Do not count executed instructions.
107 1 = Precise instruction counting.
108 2 = Adaptive rate instruction counting. */
109 int use_icount;
111 bool set_preferred_target_page_bits(int bits)
113 /* The target page size is the lowest common denominator for all
114 * the CPUs in the system, so we can only make it smaller, never
115 * larger. And we can't make it smaller once we've committed to
116 * a particular size.
118 #ifdef TARGET_PAGE_BITS_VARY
119 assert(bits >= TARGET_PAGE_BITS_MIN);
120 if (target_page_bits == 0 || target_page_bits > bits) {
121 if (target_page_bits_decided) {
122 return false;
124 target_page_bits = bits;
126 #endif
127 return true;
130 #if !defined(CONFIG_USER_ONLY)
132 static void finalize_target_page_bits(void)
134 #ifdef TARGET_PAGE_BITS_VARY
135 if (target_page_bits == 0) {
136 target_page_bits = TARGET_PAGE_BITS_MIN;
138 target_page_bits_decided = true;
139 #endif
142 typedef struct PhysPageEntry PhysPageEntry;
144 struct PhysPageEntry {
145 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
146 uint32_t skip : 6;
147 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
148 uint32_t ptr : 26;
151 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
153 /* Size of the L2 (and L3, etc) page tables. */
154 #define ADDR_SPACE_BITS 64
156 #define P_L2_BITS 9
157 #define P_L2_SIZE (1 << P_L2_BITS)
159 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
161 typedef PhysPageEntry Node[P_L2_SIZE];
163 typedef struct PhysPageMap {
164 struct rcu_head rcu;
166 unsigned sections_nb;
167 unsigned sections_nb_alloc;
168 unsigned nodes_nb;
169 unsigned nodes_nb_alloc;
170 Node *nodes;
171 MemoryRegionSection *sections;
172 } PhysPageMap;
174 struct AddressSpaceDispatch {
175 struct rcu_head rcu;
177 MemoryRegionSection *mru_section;
178 /* This is a multi-level map on the physical address space.
179 * The bottom level has pointers to MemoryRegionSections.
181 PhysPageEntry phys_map;
182 PhysPageMap map;
183 AddressSpace *as;
186 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
187 typedef struct subpage_t {
188 MemoryRegion iomem;
189 AddressSpace *as;
190 hwaddr base;
191 uint16_t sub_section[];
192 } subpage_t;
194 #define PHYS_SECTION_UNASSIGNED 0
195 #define PHYS_SECTION_NOTDIRTY 1
196 #define PHYS_SECTION_ROM 2
197 #define PHYS_SECTION_WATCH 3
199 static void io_mem_init(void);
200 static void memory_map_init(void);
201 static void tcg_commit(MemoryListener *listener);
203 static MemoryRegion io_mem_watch;
206 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
207 * @cpu: the CPU whose AddressSpace this is
208 * @as: the AddressSpace itself
209 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
210 * @tcg_as_listener: listener for tracking changes to the AddressSpace
212 struct CPUAddressSpace {
213 CPUState *cpu;
214 AddressSpace *as;
215 struct AddressSpaceDispatch *memory_dispatch;
216 MemoryListener tcg_as_listener;
219 #endif
221 #if !defined(CONFIG_USER_ONLY)
223 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
225 static unsigned alloc_hint = 16;
226 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
227 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, alloc_hint);
228 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
229 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
230 alloc_hint = map->nodes_nb_alloc;
234 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
236 unsigned i;
237 uint32_t ret;
238 PhysPageEntry e;
239 PhysPageEntry *p;
241 ret = map->nodes_nb++;
242 p = map->nodes[ret];
243 assert(ret != PHYS_MAP_NODE_NIL);
244 assert(ret != map->nodes_nb_alloc);
246 e.skip = leaf ? 0 : 1;
247 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
248 for (i = 0; i < P_L2_SIZE; ++i) {
249 memcpy(&p[i], &e, sizeof(e));
251 return ret;
254 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
255 hwaddr *index, hwaddr *nb, uint16_t leaf,
256 int level)
258 PhysPageEntry *p;
259 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
261 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
262 lp->ptr = phys_map_node_alloc(map, level == 0);
264 p = map->nodes[lp->ptr];
265 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
267 while (*nb && lp < &p[P_L2_SIZE]) {
268 if ((*index & (step - 1)) == 0 && *nb >= step) {
269 lp->skip = 0;
270 lp->ptr = leaf;
271 *index += step;
272 *nb -= step;
273 } else {
274 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
276 ++lp;
280 static void phys_page_set(AddressSpaceDispatch *d,
281 hwaddr index, hwaddr nb,
282 uint16_t leaf)
284 /* Wildly overreserve - it doesn't matter much. */
285 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
287 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
290 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
291 * and update our entry so we can skip it and go directly to the destination.
293 static void phys_page_compact(PhysPageEntry *lp, Node *nodes)
295 unsigned valid_ptr = P_L2_SIZE;
296 int valid = 0;
297 PhysPageEntry *p;
298 int i;
300 if (lp->ptr == PHYS_MAP_NODE_NIL) {
301 return;
304 p = nodes[lp->ptr];
305 for (i = 0; i < P_L2_SIZE; i++) {
306 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
307 continue;
310 valid_ptr = i;
311 valid++;
312 if (p[i].skip) {
313 phys_page_compact(&p[i], nodes);
317 /* We can only compress if there's only one child. */
318 if (valid != 1) {
319 return;
322 assert(valid_ptr < P_L2_SIZE);
324 /* Don't compress if it won't fit in the # of bits we have. */
325 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
326 return;
329 lp->ptr = p[valid_ptr].ptr;
330 if (!p[valid_ptr].skip) {
331 /* If our only child is a leaf, make this a leaf. */
332 /* By design, we should have made this node a leaf to begin with so we
333 * should never reach here.
334 * But since it's so simple to handle this, let's do it just in case we
335 * change this rule.
337 lp->skip = 0;
338 } else {
339 lp->skip += p[valid_ptr].skip;
343 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
345 if (d->phys_map.skip) {
346 phys_page_compact(&d->phys_map, d->map.nodes);
350 static inline bool section_covers_addr(const MemoryRegionSection *section,
351 hwaddr addr)
353 /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
354 * the section must cover the entire address space.
356 return int128_gethi(section->size) ||
357 range_covers_byte(section->offset_within_address_space,
358 int128_getlo(section->size), addr);
361 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
362 Node *nodes, MemoryRegionSection *sections)
364 PhysPageEntry *p;
365 hwaddr index = addr >> TARGET_PAGE_BITS;
366 int i;
368 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
369 if (lp.ptr == PHYS_MAP_NODE_NIL) {
370 return &sections[PHYS_SECTION_UNASSIGNED];
372 p = nodes[lp.ptr];
373 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
376 if (section_covers_addr(&sections[lp.ptr], addr)) {
377 return &sections[lp.ptr];
378 } else {
379 return &sections[PHYS_SECTION_UNASSIGNED];
383 bool memory_region_is_unassigned(MemoryRegion *mr)
385 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
386 && mr != &io_mem_watch;
389 /* Called from RCU critical section */
390 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
391 hwaddr addr,
392 bool resolve_subpage)
394 MemoryRegionSection *section = atomic_read(&d->mru_section);
395 subpage_t *subpage;
396 bool update;
398 if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
399 section_covers_addr(section, addr)) {
400 update = false;
401 } else {
402 section = phys_page_find(d->phys_map, addr, d->map.nodes,
403 d->map.sections);
404 update = true;
406 if (resolve_subpage && section->mr->subpage) {
407 subpage = container_of(section->mr, subpage_t, iomem);
408 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
410 if (update) {
411 atomic_set(&d->mru_section, section);
413 return section;
416 /* Called from RCU critical section */
417 static MemoryRegionSection *
418 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
419 hwaddr *plen, bool resolve_subpage)
421 MemoryRegionSection *section;
422 MemoryRegion *mr;
423 Int128 diff;
425 section = address_space_lookup_region(d, addr, resolve_subpage);
426 /* Compute offset within MemoryRegionSection */
427 addr -= section->offset_within_address_space;
429 /* Compute offset within MemoryRegion */
430 *xlat = addr + section->offset_within_region;
432 mr = section->mr;
434 /* MMIO registers can be expected to perform full-width accesses based only
435 * on their address, without considering adjacent registers that could
436 * decode to completely different MemoryRegions. When such registers
437 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
438 * regions overlap wildly. For this reason we cannot clamp the accesses
439 * here.
441 * If the length is small (as is the case for address_space_ldl/stl),
442 * everything works fine. If the incoming length is large, however,
443 * the caller really has to do the clamping through memory_access_size.
445 if (memory_region_is_ram(mr)) {
446 diff = int128_sub(section->size, int128_make64(addr));
447 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
449 return section;
452 /* Called from RCU critical section */
453 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
454 hwaddr *xlat, hwaddr *plen,
455 bool is_write)
457 IOMMUTLBEntry iotlb;
458 MemoryRegionSection *section;
459 MemoryRegion *mr;
461 for (;;) {
462 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
463 section = address_space_translate_internal(d, addr, &addr, plen, true);
464 mr = section->mr;
466 if (!mr->iommu_ops) {
467 break;
470 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
471 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
472 | (addr & iotlb.addr_mask));
473 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
474 if (!(iotlb.perm & (1 << is_write))) {
475 mr = &io_mem_unassigned;
476 break;
479 as = iotlb.target_as;
482 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
483 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
484 *plen = MIN(page, *plen);
487 *xlat = addr;
488 return mr;
491 /* Called from RCU critical section */
492 MemoryRegionSection *
493 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
494 hwaddr *xlat, hwaddr *plen)
496 MemoryRegionSection *section;
497 AddressSpaceDispatch *d = atomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch);
499 section = address_space_translate_internal(d, addr, xlat, plen, false);
501 assert(!section->mr->iommu_ops);
502 return section;
504 #endif
506 #if !defined(CONFIG_USER_ONLY)
508 static int cpu_common_post_load(void *opaque, int version_id)
510 CPUState *cpu = opaque;
512 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
513 version_id is increased. */
514 cpu->interrupt_request &= ~0x01;
515 tlb_flush(cpu, 1);
517 return 0;
520 static int cpu_common_pre_load(void *opaque)
522 CPUState *cpu = opaque;
524 cpu->exception_index = -1;
526 return 0;
529 static bool cpu_common_exception_index_needed(void *opaque)
531 CPUState *cpu = opaque;
533 return tcg_enabled() && cpu->exception_index != -1;
536 static const VMStateDescription vmstate_cpu_common_exception_index = {
537 .name = "cpu_common/exception_index",
538 .version_id = 1,
539 .minimum_version_id = 1,
540 .needed = cpu_common_exception_index_needed,
541 .fields = (VMStateField[]) {
542 VMSTATE_INT32(exception_index, CPUState),
543 VMSTATE_END_OF_LIST()
547 static bool cpu_common_crash_occurred_needed(void *opaque)
549 CPUState *cpu = opaque;
551 return cpu->crash_occurred;
554 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
555 .name = "cpu_common/crash_occurred",
556 .version_id = 1,
557 .minimum_version_id = 1,
558 .needed = cpu_common_crash_occurred_needed,
559 .fields = (VMStateField[]) {
560 VMSTATE_BOOL(crash_occurred, CPUState),
561 VMSTATE_END_OF_LIST()
565 const VMStateDescription vmstate_cpu_common = {
566 .name = "cpu_common",
567 .version_id = 1,
568 .minimum_version_id = 1,
569 .pre_load = cpu_common_pre_load,
570 .post_load = cpu_common_post_load,
571 .fields = (VMStateField[]) {
572 VMSTATE_UINT32(halted, CPUState),
573 VMSTATE_UINT32(interrupt_request, CPUState),
574 VMSTATE_END_OF_LIST()
576 .subsections = (const VMStateDescription*[]) {
577 &vmstate_cpu_common_exception_index,
578 &vmstate_cpu_common_crash_occurred,
579 NULL
583 #endif
585 CPUState *qemu_get_cpu(int index)
587 CPUState *cpu;
589 CPU_FOREACH(cpu) {
590 if (cpu->cpu_index == index) {
591 return cpu;
595 return NULL;
598 #if !defined(CONFIG_USER_ONLY)
599 void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
601 CPUAddressSpace *newas;
603 /* Target code should have set num_ases before calling us */
604 assert(asidx < cpu->num_ases);
606 if (asidx == 0) {
607 /* address space 0 gets the convenience alias */
608 cpu->as = as;
611 /* KVM cannot currently support multiple address spaces. */
612 assert(asidx == 0 || !kvm_enabled());
614 if (!cpu->cpu_ases) {
615 cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
618 newas = &cpu->cpu_ases[asidx];
619 newas->cpu = cpu;
620 newas->as = as;
621 if (tcg_enabled()) {
622 newas->tcg_as_listener.commit = tcg_commit;
623 memory_listener_register(&newas->tcg_as_listener, as);
627 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
629 /* Return the AddressSpace corresponding to the specified index */
630 return cpu->cpu_ases[asidx].as;
632 #endif
634 void cpu_exec_unrealizefn(CPUState *cpu)
636 CPUClass *cc = CPU_GET_CLASS(cpu);
638 cpu_list_remove(cpu);
640 if (cc->vmsd != NULL) {
641 vmstate_unregister(NULL, cc->vmsd, cpu);
643 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
644 vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
648 void cpu_exec_initfn(CPUState *cpu)
650 #ifdef TARGET_WORDS_BIGENDIAN
651 cpu->bigendian = true;
652 #else
653 cpu->bigendian = false;
654 #endif
655 cpu->as = NULL;
656 cpu->num_ases = 0;
658 #ifndef CONFIG_USER_ONLY
659 cpu->thread_id = qemu_get_thread_id();
661 /* This is a softmmu CPU object, so create a property for it
662 * so users can wire up its memory. (This can't go in qom/cpu.c
663 * because that file is compiled only once for both user-mode
664 * and system builds.) The default if no link is set up is to use
665 * the system address space.
667 object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
668 (Object **)&cpu->memory,
669 qdev_prop_allow_set_link_before_realize,
670 OBJ_PROP_LINK_UNREF_ON_RELEASE,
671 &error_abort);
672 cpu->memory = system_memory;
673 object_ref(OBJECT(cpu->memory));
674 #endif
677 void cpu_exec_realizefn(CPUState *cpu, Error **errp)
679 CPUClass *cc ATTRIBUTE_UNUSED = CPU_GET_CLASS(cpu);
681 cpu_list_add(cpu);
683 #ifndef CONFIG_USER_ONLY
684 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
685 vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
687 if (cc->vmsd != NULL) {
688 vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
690 #endif
693 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
695 /* Flush the whole TB as this will not have race conditions
696 * even if we don't have proper locking yet.
697 * Ideally we would just invalidate the TBs for the
698 * specified PC.
700 tb_flush(cpu);
703 #if defined(CONFIG_USER_ONLY)
704 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
709 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
710 int flags)
712 return -ENOSYS;
715 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
719 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
720 int flags, CPUWatchpoint **watchpoint)
722 return -ENOSYS;
724 #else
725 /* Add a watchpoint. */
726 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
727 int flags, CPUWatchpoint **watchpoint)
729 CPUWatchpoint *wp;
731 /* forbid ranges which are empty or run off the end of the address space */
732 if (len == 0 || (addr + len - 1) < addr) {
733 error_report("tried to set invalid watchpoint at %"
734 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
735 return -EINVAL;
737 wp = g_malloc(sizeof(*wp));
739 wp->vaddr = addr;
740 wp->len = len;
741 wp->flags = flags;
743 /* keep all GDB-injected watchpoints in front */
744 if (flags & BP_GDB) {
745 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
746 } else {
747 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
750 tlb_flush_page(cpu, addr);
752 if (watchpoint)
753 *watchpoint = wp;
754 return 0;
757 /* Remove a specific watchpoint. */
758 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
759 int flags)
761 CPUWatchpoint *wp;
763 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
764 if (addr == wp->vaddr && len == wp->len
765 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
766 cpu_watchpoint_remove_by_ref(cpu, wp);
767 return 0;
770 return -ENOENT;
773 /* Remove a specific watchpoint by reference. */
774 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
776 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
778 tlb_flush_page(cpu, watchpoint->vaddr);
780 g_free(watchpoint);
783 /* Remove all matching watchpoints. */
784 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
786 CPUWatchpoint *wp, *next;
788 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
789 if (wp->flags & mask) {
790 cpu_watchpoint_remove_by_ref(cpu, wp);
795 /* Return true if this watchpoint address matches the specified
796 * access (ie the address range covered by the watchpoint overlaps
797 * partially or completely with the address range covered by the
798 * access).
800 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
801 vaddr addr,
802 vaddr len)
804 /* We know the lengths are non-zero, but a little caution is
805 * required to avoid errors in the case where the range ends
806 * exactly at the top of the address space and so addr + len
807 * wraps round to zero.
809 vaddr wpend = wp->vaddr + wp->len - 1;
810 vaddr addrend = addr + len - 1;
812 return !(addr > wpend || wp->vaddr > addrend);
815 #endif
817 /* Add a breakpoint. */
818 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
819 CPUBreakpoint **breakpoint)
821 CPUBreakpoint *bp;
823 bp = g_malloc(sizeof(*bp));
825 bp->pc = pc;
826 bp->flags = flags;
828 /* keep all GDB-injected breakpoints in front */
829 if (flags & BP_GDB) {
830 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
831 } else {
832 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
835 breakpoint_invalidate(cpu, pc);
837 if (breakpoint) {
838 *breakpoint = bp;
840 return 0;
843 /* Remove a specific breakpoint. */
844 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
846 CPUBreakpoint *bp;
848 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
849 if (bp->pc == pc && bp->flags == flags) {
850 cpu_breakpoint_remove_by_ref(cpu, bp);
851 return 0;
854 return -ENOENT;
857 /* Remove a specific breakpoint by reference. */
858 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
860 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
862 breakpoint_invalidate(cpu, breakpoint->pc);
864 g_free(breakpoint);
867 /* Remove all matching breakpoints. */
868 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
870 CPUBreakpoint *bp, *next;
872 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
873 if (bp->flags & mask) {
874 cpu_breakpoint_remove_by_ref(cpu, bp);
879 /* enable or disable single step mode. EXCP_DEBUG is returned by the
880 CPU loop after each instruction */
881 void cpu_single_step(CPUState *cpu, int enabled)
883 if (cpu->singlestep_enabled != enabled) {
884 cpu->singlestep_enabled = enabled;
885 if (kvm_enabled()) {
886 kvm_update_guest_debug(cpu, 0);
887 } else {
888 /* must flush all the translated code to avoid inconsistencies */
889 /* XXX: only flush what is necessary */
890 tb_flush(cpu);
895 void QEMU_NORETURN cpu_abort(CPUState *cpu, const char *fmt, ...)
897 va_list ap;
898 va_list ap2;
900 va_start(ap, fmt);
901 va_copy(ap2, ap);
902 fprintf(stderr, "qemu: fatal: ");
903 vfprintf(stderr, fmt, ap);
904 fprintf(stderr, "\n");
905 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
906 if (qemu_log_separate()) {
907 qemu_log_lock();
908 qemu_log("qemu: fatal: ");
909 qemu_log_vprintf(fmt, ap2);
910 qemu_log("\n");
911 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
912 qemu_log_flush();
913 qemu_log_unlock();
914 qemu_log_close();
916 va_end(ap2);
917 va_end(ap);
918 replay_finish();
919 #if defined(CONFIG_USER_ONLY)
921 struct sigaction act;
922 sigfillset(&act.sa_mask);
923 act.sa_handler = SIG_DFL;
924 sigaction(SIGABRT, &act, NULL);
926 #endif
927 abort();
930 #if !defined(CONFIG_USER_ONLY)
931 /* Called from RCU critical section */
932 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
934 RAMBlock *block;
936 block = atomic_rcu_read(&ram_list.mru_block);
937 if (block && addr - block->offset < block->max_length) {
938 return block;
940 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
941 if (addr - block->offset < block->max_length) {
942 goto found;
946 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
947 abort();
949 found:
950 /* It is safe to write mru_block outside the iothread lock. This
951 * is what happens:
953 * mru_block = xxx
954 * rcu_read_unlock()
955 * xxx removed from list
956 * rcu_read_lock()
957 * read mru_block
958 * mru_block = NULL;
959 * call_rcu(reclaim_ramblock, xxx);
960 * rcu_read_unlock()
962 * atomic_rcu_set is not needed here. The block was already published
963 * when it was placed into the list. Here we're just making an extra
964 * copy of the pointer.
966 ram_list.mru_block = block;
967 return block;
970 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
972 CPUState *cpu;
973 ram_addr_t start1;
974 RAMBlock *block;
975 ram_addr_t end;
977 end = TARGET_PAGE_ALIGN(start + length);
978 start &= TARGET_PAGE_MASK;
980 rcu_read_lock();
981 block = qemu_get_ram_block(start);
982 assert(block == qemu_get_ram_block(end - 1));
983 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
984 CPU_FOREACH(cpu) {
985 tlb_reset_dirty(cpu, start1, length);
987 rcu_read_unlock();
990 /* Note: start and end must be within the same ram block. */
991 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
992 ram_addr_t length,
993 unsigned client)
995 DirtyMemoryBlocks *blocks;
996 unsigned long end, page;
997 bool dirty = false;
999 if (length == 0) {
1000 return false;
1003 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
1004 page = start >> TARGET_PAGE_BITS;
1006 rcu_read_lock();
1008 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1010 while (page < end) {
1011 unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1012 unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1013 unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1015 dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1016 offset, num);
1017 page += num;
1020 rcu_read_unlock();
1022 if (dirty && tcg_enabled()) {
1023 tlb_reset_dirty_range_all(start, length);
1026 return dirty;
1029 /* Called from RCU critical section */
1030 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1031 MemoryRegionSection *section,
1032 target_ulong vaddr,
1033 hwaddr paddr, hwaddr xlat,
1034 int prot,
1035 target_ulong *address)
1037 hwaddr iotlb;
1038 CPUWatchpoint *wp;
1040 if (memory_region_is_ram(section->mr)) {
1041 /* Normal RAM. */
1042 iotlb = memory_region_get_ram_addr(section->mr) + xlat;
1043 if (!section->readonly) {
1044 iotlb |= PHYS_SECTION_NOTDIRTY;
1045 } else {
1046 iotlb |= PHYS_SECTION_ROM;
1048 } else {
1049 AddressSpaceDispatch *d;
1051 d = atomic_rcu_read(&section->address_space->dispatch);
1052 iotlb = section - d->map.sections;
1053 iotlb += xlat;
1056 /* Make accesses to pages with watchpoints go via the
1057 watchpoint trap routines. */
1058 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1059 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1060 /* Avoid trapping reads of pages with a write breakpoint. */
1061 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1062 iotlb = PHYS_SECTION_WATCH + paddr;
1063 *address |= TLB_MMIO;
1064 break;
1069 return iotlb;
1071 #endif /* defined(CONFIG_USER_ONLY) */
1073 #if !defined(CONFIG_USER_ONLY)
1075 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1076 uint16_t section);
1077 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1079 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1080 qemu_anon_ram_alloc;
1083 * Set a custom physical guest memory alloator.
1084 * Accelerators with unusual needs may need this. Hopefully, we can
1085 * get rid of it eventually.
1087 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1089 phys_mem_alloc = alloc;
1092 static uint16_t phys_section_add(PhysPageMap *map,
1093 MemoryRegionSection *section)
1095 /* The physical section number is ORed with a page-aligned
1096 * pointer to produce the iotlb entries. Thus it should
1097 * never overflow into the page-aligned value.
1099 assert(map->sections_nb < TARGET_PAGE_SIZE);
1101 if (map->sections_nb == map->sections_nb_alloc) {
1102 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1103 map->sections = g_renew(MemoryRegionSection, map->sections,
1104 map->sections_nb_alloc);
1106 map->sections[map->sections_nb] = *section;
1107 memory_region_ref(section->mr);
1108 return map->sections_nb++;
1111 static void phys_section_destroy(MemoryRegion *mr)
1113 bool have_sub_page = mr->subpage;
1115 memory_region_unref(mr);
1117 if (have_sub_page) {
1118 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1119 object_unref(OBJECT(&subpage->iomem));
1120 g_free(subpage);
1124 static void phys_sections_free(PhysPageMap *map)
1126 while (map->sections_nb > 0) {
1127 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1128 phys_section_destroy(section->mr);
1130 g_free(map->sections);
1131 g_free(map->nodes);
1134 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1136 subpage_t *subpage;
1137 hwaddr base = section->offset_within_address_space
1138 & TARGET_PAGE_MASK;
1139 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1140 d->map.nodes, d->map.sections);
1141 MemoryRegionSection subsection = {
1142 .offset_within_address_space = base,
1143 .size = int128_make64(TARGET_PAGE_SIZE),
1145 hwaddr start, end;
1147 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1149 if (!(existing->mr->subpage)) {
1150 subpage = subpage_init(d->as, base);
1151 subsection.address_space = d->as;
1152 subsection.mr = &subpage->iomem;
1153 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1154 phys_section_add(&d->map, &subsection));
1155 } else {
1156 subpage = container_of(existing->mr, subpage_t, iomem);
1158 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1159 end = start + int128_get64(section->size) - 1;
1160 subpage_register(subpage, start, end,
1161 phys_section_add(&d->map, section));
1165 static void register_multipage(AddressSpaceDispatch *d,
1166 MemoryRegionSection *section)
1168 hwaddr start_addr = section->offset_within_address_space;
1169 uint16_t section_index = phys_section_add(&d->map, section);
1170 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1171 TARGET_PAGE_BITS));
1173 assert(num_pages);
1174 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1177 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1179 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1180 AddressSpaceDispatch *d = as->next_dispatch;
1181 MemoryRegionSection now = *section, remain = *section;
1182 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1184 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1185 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1186 - now.offset_within_address_space;
1188 now.size = int128_min(int128_make64(left), now.size);
1189 register_subpage(d, &now);
1190 } else {
1191 now.size = int128_zero();
1193 while (int128_ne(remain.size, now.size)) {
1194 remain.size = int128_sub(remain.size, now.size);
1195 remain.offset_within_address_space += int128_get64(now.size);
1196 remain.offset_within_region += int128_get64(now.size);
1197 now = remain;
1198 if (int128_lt(remain.size, page_size)) {
1199 register_subpage(d, &now);
1200 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1201 now.size = page_size;
1202 register_subpage(d, &now);
1203 } else {
1204 now.size = int128_and(now.size, int128_neg(page_size));
1205 register_multipage(d, &now);
1210 void qemu_flush_coalesced_mmio_buffer(void)
1212 if (kvm_enabled())
1213 kvm_flush_coalesced_mmio_buffer();
1216 void qemu_mutex_lock_ramlist(void)
1218 qemu_mutex_lock(&ram_list.mutex);
1221 void qemu_mutex_unlock_ramlist(void)
1223 qemu_mutex_unlock(&ram_list.mutex);
1226 #ifdef __linux__
1227 static int64_t get_file_size(int fd)
1229 int64_t size = lseek(fd, 0, SEEK_END);
1230 if (size < 0) {
1231 return -errno;
1233 return size;
1236 static void *file_ram_alloc(RAMBlock *block,
1237 ram_addr_t memory,
1238 const char *path,
1239 Error **errp)
1241 bool unlink_on_error = false;
1242 char *filename;
1243 char *sanitized_name;
1244 char *c;
1245 void * volatile area = MAP_FAILED;
1246 int fd = -1;
1247 int64_t file_size;
1249 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1250 error_setg(errp,
1251 "host lacks kvm mmu notifiers, -mem-path unsupported");
1252 return NULL;
1255 for (;;) {
1256 fd = open(path, O_RDWR);
1257 if (fd >= 0) {
1258 /* @path names an existing file, use it */
1259 break;
1261 if (errno == ENOENT) {
1262 /* @path names a file that doesn't exist, create it */
1263 fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1264 if (fd >= 0) {
1265 unlink_on_error = true;
1266 break;
1268 } else if (errno == EISDIR) {
1269 /* @path names a directory, create a file there */
1270 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1271 sanitized_name = g_strdup(memory_region_name(block->mr));
1272 for (c = sanitized_name; *c != '\0'; c++) {
1273 if (*c == '/') {
1274 *c = '_';
1278 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1279 sanitized_name);
1280 g_free(sanitized_name);
1282 fd = mkstemp(filename);
1283 if (fd >= 0) {
1284 unlink(filename);
1285 g_free(filename);
1286 break;
1288 g_free(filename);
1290 if (errno != EEXIST && errno != EINTR) {
1291 error_setg_errno(errp, errno,
1292 "can't open backing store %s for guest RAM",
1293 path);
1294 goto error;
1297 * Try again on EINTR and EEXIST. The latter happens when
1298 * something else creates the file between our two open().
1302 block->page_size = qemu_fd_getpagesize(fd);
1303 block->mr->align = block->page_size;
1304 #if defined(__s390x__)
1305 if (kvm_enabled()) {
1306 block->mr->align = MAX(block->mr->align, QEMU_VMALLOC_ALIGN);
1308 #endif
1310 file_size = get_file_size(fd);
1312 if (memory < block->page_size) {
1313 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1314 "or larger than page size 0x%zx",
1315 memory, block->page_size);
1316 goto error;
1319 if (file_size > 0 && file_size < memory) {
1320 error_setg(errp, "backing store %s size 0x%" PRIx64
1321 " does not match 'size' option 0x" RAM_ADDR_FMT,
1322 path, file_size, memory);
1323 goto error;
1326 memory = ROUND_UP(memory, block->page_size);
1329 * ftruncate is not supported by hugetlbfs in older
1330 * hosts, so don't bother bailing out on errors.
1331 * If anything goes wrong with it under other filesystems,
1332 * mmap will fail.
1334 * Do not truncate the non-empty backend file to avoid corrupting
1335 * the existing data in the file. Disabling shrinking is not
1336 * enough. For example, the current vNVDIMM implementation stores
1337 * the guest NVDIMM labels at the end of the backend file. If the
1338 * backend file is later extended, QEMU will not be able to find
1339 * those labels. Therefore, extending the non-empty backend file
1340 * is disabled as well.
1342 if (!file_size && ftruncate(fd, memory)) {
1343 perror("ftruncate");
1346 area = qemu_ram_mmap(fd, memory, block->mr->align,
1347 block->flags & RAM_SHARED);
1348 if (area == MAP_FAILED) {
1349 error_setg_errno(errp, errno,
1350 "unable to map backing store for guest RAM");
1351 goto error;
1354 if (mem_prealloc) {
1355 os_mem_prealloc(fd, area, memory, errp);
1356 if (errp && *errp) {
1357 goto error;
1361 block->fd = fd;
1362 return area;
1364 error:
1365 if (area != MAP_FAILED) {
1366 qemu_ram_munmap(area, memory);
1368 if (unlink_on_error) {
1369 unlink(path);
1371 if (fd != -1) {
1372 close(fd);
1374 return NULL;
1376 #endif
1378 /* Called with the ramlist lock held. */
1379 static ram_addr_t find_ram_offset(ram_addr_t size)
1381 RAMBlock *block, *next_block;
1382 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1384 assert(size != 0); /* it would hand out same offset multiple times */
1386 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1387 return 0;
1390 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1391 ram_addr_t end, next = RAM_ADDR_MAX;
1393 end = block->offset + block->max_length;
1395 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1396 if (next_block->offset >= end) {
1397 next = MIN(next, next_block->offset);
1400 if (next - end >= size && next - end < mingap) {
1401 offset = end;
1402 mingap = next - end;
1406 if (offset == RAM_ADDR_MAX) {
1407 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1408 (uint64_t)size);
1409 abort();
1412 return offset;
1415 ram_addr_t last_ram_offset(void)
1417 RAMBlock *block;
1418 ram_addr_t last = 0;
1420 rcu_read_lock();
1421 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1422 last = MAX(last, block->offset + block->max_length);
1424 rcu_read_unlock();
1425 return last;
1428 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1430 int ret;
1432 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1433 if (!machine_dump_guest_core(current_machine)) {
1434 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1435 if (ret) {
1436 perror("qemu_madvise");
1437 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1438 "but dump_guest_core=off specified\n");
1443 const char *qemu_ram_get_idstr(RAMBlock *rb)
1445 return rb->idstr;
1448 /* Called with iothread lock held. */
1449 void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
1451 RAMBlock *block;
1453 assert(new_block);
1454 assert(!new_block->idstr[0]);
1456 if (dev) {
1457 char *id = qdev_get_dev_path(dev);
1458 if (id) {
1459 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1460 g_free(id);
1463 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1465 rcu_read_lock();
1466 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1467 if (block != new_block &&
1468 !strcmp(block->idstr, new_block->idstr)) {
1469 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1470 new_block->idstr);
1471 abort();
1474 rcu_read_unlock();
1477 /* Called with iothread lock held. */
1478 void qemu_ram_unset_idstr(RAMBlock *block)
1480 /* FIXME: arch_init.c assumes that this is not called throughout
1481 * migration. Ignore the problem since hot-unplug during migration
1482 * does not work anyway.
1484 if (block) {
1485 memset(block->idstr, 0, sizeof(block->idstr));
1489 size_t qemu_ram_pagesize(RAMBlock *rb)
1491 return rb->page_size;
1494 static int memory_try_enable_merging(void *addr, size_t len)
1496 if (!machine_mem_merge(current_machine)) {
1497 /* disabled by the user */
1498 return 0;
1501 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1504 /* Only legal before guest might have detected the memory size: e.g. on
1505 * incoming migration, or right after reset.
1507 * As memory core doesn't know how is memory accessed, it is up to
1508 * resize callback to update device state and/or add assertions to detect
1509 * misuse, if necessary.
1511 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
1513 assert(block);
1515 newsize = HOST_PAGE_ALIGN(newsize);
1517 if (block->used_length == newsize) {
1518 return 0;
1521 if (!(block->flags & RAM_RESIZEABLE)) {
1522 error_setg_errno(errp, EINVAL,
1523 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1524 " in != 0x" RAM_ADDR_FMT, block->idstr,
1525 newsize, block->used_length);
1526 return -EINVAL;
1529 if (block->max_length < newsize) {
1530 error_setg_errno(errp, EINVAL,
1531 "Length too large: %s: 0x" RAM_ADDR_FMT
1532 " > 0x" RAM_ADDR_FMT, block->idstr,
1533 newsize, block->max_length);
1534 return -EINVAL;
1537 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1538 block->used_length = newsize;
1539 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1540 DIRTY_CLIENTS_ALL);
1541 memory_region_set_size(block->mr, newsize);
1542 if (block->resized) {
1543 block->resized(block->idstr, newsize, block->host);
1545 return 0;
1548 /* Called with ram_list.mutex held */
1549 static void dirty_memory_extend(ram_addr_t old_ram_size,
1550 ram_addr_t new_ram_size)
1552 ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1553 DIRTY_MEMORY_BLOCK_SIZE);
1554 ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1555 DIRTY_MEMORY_BLOCK_SIZE);
1556 int i;
1558 /* Only need to extend if block count increased */
1559 if (new_num_blocks <= old_num_blocks) {
1560 return;
1563 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1564 DirtyMemoryBlocks *old_blocks;
1565 DirtyMemoryBlocks *new_blocks;
1566 int j;
1568 old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1569 new_blocks = g_malloc(sizeof(*new_blocks) +
1570 sizeof(new_blocks->blocks[0]) * new_num_blocks);
1572 if (old_num_blocks) {
1573 memcpy(new_blocks->blocks, old_blocks->blocks,
1574 old_num_blocks * sizeof(old_blocks->blocks[0]));
1577 for (j = old_num_blocks; j < new_num_blocks; j++) {
1578 new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1581 atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1583 if (old_blocks) {
1584 g_free_rcu(old_blocks, rcu);
1589 static void ram_block_add(RAMBlock *new_block, Error **errp)
1591 RAMBlock *block;
1592 RAMBlock *last_block = NULL;
1593 ram_addr_t old_ram_size, new_ram_size;
1594 Error *err = NULL;
1596 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1598 qemu_mutex_lock_ramlist();
1599 new_block->offset = find_ram_offset(new_block->max_length);
1601 if (!new_block->host) {
1602 if (xen_enabled()) {
1603 xen_ram_alloc(new_block->offset, new_block->max_length,
1604 new_block->mr, &err);
1605 if (err) {
1606 error_propagate(errp, err);
1607 qemu_mutex_unlock_ramlist();
1608 return;
1610 } else {
1611 new_block->host = phys_mem_alloc(new_block->max_length,
1612 &new_block->mr->align);
1613 if (!new_block->host) {
1614 error_setg_errno(errp, errno,
1615 "cannot set up guest memory '%s'",
1616 memory_region_name(new_block->mr));
1617 qemu_mutex_unlock_ramlist();
1618 return;
1621 * In Hax, the qemu allocate the virtual address, and HAX kernel
1622 * populate the memory with physical memory. Currently we have no
1623 * paging, so user should make sure enough free memory in advance
1625 if (hax_enabled()) {
1626 int ret;
1627 ret = hax_populate_ram((uint64_t)(uintptr_t)new_block->host,
1628 new_block->max_length);
1629 if (ret < 0) {
1630 error_setg(errp, "Hax failed to populate ram");
1631 return;
1635 memory_try_enable_merging(new_block->host, new_block->max_length);
1639 new_ram_size = MAX(old_ram_size,
1640 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1641 if (new_ram_size > old_ram_size) {
1642 migration_bitmap_extend(old_ram_size, new_ram_size);
1643 dirty_memory_extend(old_ram_size, new_ram_size);
1645 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1646 * QLIST (which has an RCU-friendly variant) does not have insertion at
1647 * tail, so save the last element in last_block.
1649 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1650 last_block = block;
1651 if (block->max_length < new_block->max_length) {
1652 break;
1655 if (block) {
1656 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1657 } else if (last_block) {
1658 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1659 } else { /* list is empty */
1660 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1662 ram_list.mru_block = NULL;
1664 /* Write list before version */
1665 smp_wmb();
1666 ram_list.version++;
1667 qemu_mutex_unlock_ramlist();
1669 cpu_physical_memory_set_dirty_range(new_block->offset,
1670 new_block->used_length,
1671 DIRTY_CLIENTS_ALL);
1673 if (new_block->host) {
1674 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1675 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1676 /* MADV_DONTFORK is also needed by KVM in absence of synchronous MMU */
1677 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1681 #ifdef __linux__
1682 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1683 bool share, const char *mem_path,
1684 Error **errp)
1686 RAMBlock *new_block;
1687 Error *local_err = NULL;
1689 if (xen_enabled()) {
1690 error_setg(errp, "-mem-path not supported with Xen");
1691 return NULL;
1694 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1696 * file_ram_alloc() needs to allocate just like
1697 * phys_mem_alloc, but we haven't bothered to provide
1698 * a hook there.
1700 error_setg(errp,
1701 "-mem-path not supported with this accelerator");
1702 return NULL;
1705 size = HOST_PAGE_ALIGN(size);
1706 new_block = g_malloc0(sizeof(*new_block));
1707 new_block->mr = mr;
1708 new_block->used_length = size;
1709 new_block->max_length = size;
1710 new_block->flags = share ? RAM_SHARED : 0;
1711 new_block->host = file_ram_alloc(new_block, size,
1712 mem_path, errp);
1713 if (!new_block->host) {
1714 g_free(new_block);
1715 return NULL;
1718 ram_block_add(new_block, &local_err);
1719 if (local_err) {
1720 g_free(new_block);
1721 error_propagate(errp, local_err);
1722 return NULL;
1724 return new_block;
1726 #endif
1728 static
1729 RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1730 void (*resized)(const char*,
1731 uint64_t length,
1732 void *host),
1733 void *host, bool resizeable,
1734 MemoryRegion *mr, Error **errp)
1736 RAMBlock *new_block;
1737 Error *local_err = NULL;
1739 size = HOST_PAGE_ALIGN(size);
1740 max_size = HOST_PAGE_ALIGN(max_size);
1741 new_block = g_malloc0(sizeof(*new_block));
1742 new_block->mr = mr;
1743 new_block->resized = resized;
1744 new_block->used_length = size;
1745 new_block->max_length = max_size;
1746 assert(max_size >= size);
1747 new_block->fd = -1;
1748 new_block->page_size = getpagesize();
1749 new_block->host = host;
1750 if (host) {
1751 new_block->flags |= RAM_PREALLOC;
1753 if (resizeable) {
1754 new_block->flags |= RAM_RESIZEABLE;
1756 ram_block_add(new_block, &local_err);
1757 if (local_err) {
1758 g_free(new_block);
1759 error_propagate(errp, local_err);
1760 return NULL;
1762 return new_block;
1765 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1766 MemoryRegion *mr, Error **errp)
1768 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1771 RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1773 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1776 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1777 void (*resized)(const char*,
1778 uint64_t length,
1779 void *host),
1780 MemoryRegion *mr, Error **errp)
1782 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1785 static void reclaim_ramblock(RAMBlock *block)
1787 if (block->flags & RAM_PREALLOC) {
1789 } else if (xen_enabled()) {
1790 xen_invalidate_map_cache_entry(block->host);
1791 #ifndef _WIN32
1792 } else if (block->fd >= 0) {
1793 qemu_ram_munmap(block->host, block->max_length);
1794 close(block->fd);
1795 #endif
1796 } else {
1797 qemu_anon_ram_free(block->host, block->max_length);
1799 g_free(block);
1802 void qemu_ram_free(RAMBlock *block)
1804 if (!block) {
1805 return;
1808 qemu_mutex_lock_ramlist();
1809 QLIST_REMOVE_RCU(block, next);
1810 ram_list.mru_block = NULL;
1811 /* Write list before version */
1812 smp_wmb();
1813 ram_list.version++;
1814 call_rcu(block, reclaim_ramblock, rcu);
1815 qemu_mutex_unlock_ramlist();
1818 #ifndef _WIN32
1819 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1821 RAMBlock *block;
1822 ram_addr_t offset;
1823 int flags;
1824 void *area, *vaddr;
1826 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1827 offset = addr - block->offset;
1828 if (offset < block->max_length) {
1829 vaddr = ramblock_ptr(block, offset);
1830 if (block->flags & RAM_PREALLOC) {
1832 } else if (xen_enabled()) {
1833 abort();
1834 } else {
1835 flags = MAP_FIXED;
1836 if (block->fd >= 0) {
1837 flags |= (block->flags & RAM_SHARED ?
1838 MAP_SHARED : MAP_PRIVATE);
1839 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1840 flags, block->fd, offset);
1841 } else {
1843 * Remap needs to match alloc. Accelerators that
1844 * set phys_mem_alloc never remap. If they did,
1845 * we'd need a remap hook here.
1847 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1849 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1850 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1851 flags, -1, 0);
1853 if (area != vaddr) {
1854 fprintf(stderr, "Could not remap addr: "
1855 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1856 length, addr);
1857 exit(1);
1859 memory_try_enable_merging(vaddr, length);
1860 qemu_ram_setup_dump(vaddr, length);
1865 #endif /* !_WIN32 */
1867 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1868 * This should not be used for general purpose DMA. Use address_space_map
1869 * or address_space_rw instead. For local memory (e.g. video ram) that the
1870 * device owns, use memory_region_get_ram_ptr.
1872 * Called within RCU critical section.
1874 void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1876 RAMBlock *block = ram_block;
1878 if (block == NULL) {
1879 block = qemu_get_ram_block(addr);
1880 addr -= block->offset;
1883 if (xen_enabled() && block->host == NULL) {
1884 /* We need to check if the requested address is in the RAM
1885 * because we don't want to map the entire memory in QEMU.
1886 * In that case just map until the end of the page.
1888 if (block->offset == 0) {
1889 return xen_map_cache(addr, 0, 0);
1892 block->host = xen_map_cache(block->offset, block->max_length, 1);
1894 return ramblock_ptr(block, addr);
1897 /* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr
1898 * but takes a size argument.
1900 * Called within RCU critical section.
1902 static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1903 hwaddr *size)
1905 RAMBlock *block = ram_block;
1906 if (*size == 0) {
1907 return NULL;
1910 if (block == NULL) {
1911 block = qemu_get_ram_block(addr);
1912 addr -= block->offset;
1914 *size = MIN(*size, block->max_length - addr);
1916 if (xen_enabled() && block->host == NULL) {
1917 /* We need to check if the requested address is in the RAM
1918 * because we don't want to map the entire memory in QEMU.
1919 * In that case just map the requested area.
1921 if (block->offset == 0) {
1922 return xen_map_cache(addr, *size, 1);
1925 block->host = xen_map_cache(block->offset, block->max_length, 1);
1928 return ramblock_ptr(block, addr);
1932 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1933 * in that RAMBlock.
1935 * ptr: Host pointer to look up
1936 * round_offset: If true round the result offset down to a page boundary
1937 * *ram_addr: set to result ram_addr
1938 * *offset: set to result offset within the RAMBlock
1940 * Returns: RAMBlock (or NULL if not found)
1942 * By the time this function returns, the returned pointer is not protected
1943 * by RCU anymore. If the caller is not within an RCU critical section and
1944 * does not hold the iothread lock, it must have other means of protecting the
1945 * pointer, such as a reference to the region that includes the incoming
1946 * ram_addr_t.
1948 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1949 ram_addr_t *offset)
1951 RAMBlock *block;
1952 uint8_t *host = ptr;
1954 if (xen_enabled()) {
1955 ram_addr_t ram_addr;
1956 rcu_read_lock();
1957 ram_addr = xen_ram_addr_from_mapcache(ptr);
1958 block = qemu_get_ram_block(ram_addr);
1959 if (block) {
1960 *offset = ram_addr - block->offset;
1962 rcu_read_unlock();
1963 return block;
1966 rcu_read_lock();
1967 block = atomic_rcu_read(&ram_list.mru_block);
1968 if (block && block->host && host - block->host < block->max_length) {
1969 goto found;
1972 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1973 /* This case append when the block is not mapped. */
1974 if (block->host == NULL) {
1975 continue;
1977 if (host - block->host < block->max_length) {
1978 goto found;
1982 rcu_read_unlock();
1983 return NULL;
1985 found:
1986 *offset = (host - block->host);
1987 if (round_offset) {
1988 *offset &= TARGET_PAGE_MASK;
1990 rcu_read_unlock();
1991 return block;
1995 * Finds the named RAMBlock
1997 * name: The name of RAMBlock to find
1999 * Returns: RAMBlock (or NULL if not found)
2001 RAMBlock *qemu_ram_block_by_name(const char *name)
2003 RAMBlock *block;
2005 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2006 if (!strcmp(name, block->idstr)) {
2007 return block;
2011 return NULL;
2014 /* Some of the softmmu routines need to translate from a host pointer
2015 (typically a TLB entry) back to a ram offset. */
2016 ram_addr_t qemu_ram_addr_from_host(void *ptr)
2018 RAMBlock *block;
2019 ram_addr_t offset;
2021 block = qemu_ram_block_from_host(ptr, false, &offset);
2022 if (!block) {
2023 return RAM_ADDR_INVALID;
2026 return block->offset + offset;
2029 /* Called within RCU critical section. */
2030 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2031 uint64_t val, unsigned size)
2033 bool locked = false;
2035 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
2036 locked = true;
2037 tb_lock();
2038 tb_invalidate_phys_page_fast(ram_addr, size);
2040 switch (size) {
2041 case 1:
2042 stb_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2043 break;
2044 case 2:
2045 stw_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2046 break;
2047 case 4:
2048 stl_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2049 break;
2050 default:
2051 abort();
2054 if (locked) {
2055 tb_unlock();
2058 /* Set both VGA and migration bits for simplicity and to remove
2059 * the notdirty callback faster.
2061 cpu_physical_memory_set_dirty_range(ram_addr, size,
2062 DIRTY_CLIENTS_NOCODE);
2063 /* we remove the notdirty callback only if the code has been
2064 flushed */
2065 if (!cpu_physical_memory_is_clean(ram_addr)) {
2066 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2070 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2071 unsigned size, bool is_write)
2073 return is_write;
2076 static const MemoryRegionOps notdirty_mem_ops = {
2077 .write = notdirty_mem_write,
2078 .valid.accepts = notdirty_mem_accepts,
2079 .endianness = DEVICE_NATIVE_ENDIAN,
2082 /* Generate a debug exception if a watchpoint has been hit. */
2083 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2085 CPUState *cpu = current_cpu;
2086 CPUClass *cc = CPU_GET_CLASS(cpu);
2087 CPUArchState *env = cpu->env_ptr;
2088 target_ulong pc, cs_base;
2089 target_ulong vaddr;
2090 CPUWatchpoint *wp;
2091 uint32_t cpu_flags;
2093 if (cpu->watchpoint_hit) {
2094 /* We re-entered the check after replacing the TB. Now raise
2095 * the debug interrupt so that is will trigger after the
2096 * current instruction. */
2097 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2098 return;
2100 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2101 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2102 if (cpu_watchpoint_address_matches(wp, vaddr, len)
2103 && (wp->flags & flags)) {
2104 if (flags == BP_MEM_READ) {
2105 wp->flags |= BP_WATCHPOINT_HIT_READ;
2106 } else {
2107 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2109 wp->hitaddr = vaddr;
2110 wp->hitattrs = attrs;
2111 if (!cpu->watchpoint_hit) {
2112 if (wp->flags & BP_CPU &&
2113 !cc->debug_check_watchpoint(cpu, wp)) {
2114 wp->flags &= ~BP_WATCHPOINT_HIT;
2115 continue;
2117 cpu->watchpoint_hit = wp;
2119 /* The tb_lock will be reset when cpu_loop_exit or
2120 * cpu_loop_exit_noexc longjmp back into the cpu_exec
2121 * main loop.
2123 tb_lock();
2124 tb_check_watchpoint(cpu);
2125 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2126 cpu->exception_index = EXCP_DEBUG;
2127 cpu_loop_exit(cpu);
2128 } else {
2129 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2130 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2131 cpu_loop_exit_noexc(cpu);
2134 } else {
2135 wp->flags &= ~BP_WATCHPOINT_HIT;
2140 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2141 so these check for a hit then pass through to the normal out-of-line
2142 phys routines. */
2143 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2144 unsigned size, MemTxAttrs attrs)
2146 MemTxResult res;
2147 uint64_t data;
2148 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2149 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2151 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2152 switch (size) {
2153 case 1:
2154 data = address_space_ldub(as, addr, attrs, &res);
2155 break;
2156 case 2:
2157 data = address_space_lduw(as, addr, attrs, &res);
2158 break;
2159 case 4:
2160 data = address_space_ldl(as, addr, attrs, &res);
2161 break;
2162 default: abort();
2164 *pdata = data;
2165 return res;
2168 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2169 uint64_t val, unsigned size,
2170 MemTxAttrs attrs)
2172 MemTxResult res;
2173 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2174 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2176 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2177 switch (size) {
2178 case 1:
2179 address_space_stb(as, addr, val, attrs, &res);
2180 break;
2181 case 2:
2182 address_space_stw(as, addr, val, attrs, &res);
2183 break;
2184 case 4:
2185 address_space_stl(as, addr, val, attrs, &res);
2186 break;
2187 default: abort();
2189 return res;
2192 static const MemoryRegionOps watch_mem_ops = {
2193 .read_with_attrs = watch_mem_read,
2194 .write_with_attrs = watch_mem_write,
2195 .endianness = DEVICE_NATIVE_ENDIAN,
2198 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2199 unsigned len, MemTxAttrs attrs)
2201 subpage_t *subpage = opaque;
2202 uint8_t buf[8];
2203 MemTxResult res;
2205 #if defined(DEBUG_SUBPAGE)
2206 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2207 subpage, len, addr);
2208 #endif
2209 res = address_space_read(subpage->as, addr + subpage->base,
2210 attrs, buf, len);
2211 if (res) {
2212 return res;
2214 switch (len) {
2215 case 1:
2216 *data = ldub_p(buf);
2217 return MEMTX_OK;
2218 case 2:
2219 *data = lduw_p(buf);
2220 return MEMTX_OK;
2221 case 4:
2222 *data = ldl_p(buf);
2223 return MEMTX_OK;
2224 case 8:
2225 *data = ldq_p(buf);
2226 return MEMTX_OK;
2227 default:
2228 abort();
2232 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2233 uint64_t value, unsigned len, MemTxAttrs attrs)
2235 subpage_t *subpage = opaque;
2236 uint8_t buf[8];
2238 #if defined(DEBUG_SUBPAGE)
2239 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2240 " value %"PRIx64"\n",
2241 __func__, subpage, len, addr, value);
2242 #endif
2243 switch (len) {
2244 case 1:
2245 stb_p(buf, value);
2246 break;
2247 case 2:
2248 stw_p(buf, value);
2249 break;
2250 case 4:
2251 stl_p(buf, value);
2252 break;
2253 case 8:
2254 stq_p(buf, value);
2255 break;
2256 default:
2257 abort();
2259 return address_space_write(subpage->as, addr + subpage->base,
2260 attrs, buf, len);
2263 static bool subpage_accepts(void *opaque, hwaddr addr,
2264 unsigned len, bool is_write)
2266 subpage_t *subpage = opaque;
2267 #if defined(DEBUG_SUBPAGE)
2268 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2269 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2270 #endif
2272 return address_space_access_valid(subpage->as, addr + subpage->base,
2273 len, is_write);
2276 static const MemoryRegionOps subpage_ops = {
2277 .read_with_attrs = subpage_read,
2278 .write_with_attrs = subpage_write,
2279 .impl.min_access_size = 1,
2280 .impl.max_access_size = 8,
2281 .valid.min_access_size = 1,
2282 .valid.max_access_size = 8,
2283 .valid.accepts = subpage_accepts,
2284 .endianness = DEVICE_NATIVE_ENDIAN,
2287 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2288 uint16_t section)
2290 int idx, eidx;
2292 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2293 return -1;
2294 idx = SUBPAGE_IDX(start);
2295 eidx = SUBPAGE_IDX(end);
2296 #if defined(DEBUG_SUBPAGE)
2297 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2298 __func__, mmio, start, end, idx, eidx, section);
2299 #endif
2300 for (; idx <= eidx; idx++) {
2301 mmio->sub_section[idx] = section;
2304 return 0;
2307 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2309 subpage_t *mmio;
2311 mmio = g_malloc0(sizeof(subpage_t) + TARGET_PAGE_SIZE * sizeof(uint16_t));
2312 mmio->as = as;
2313 mmio->base = base;
2314 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2315 NULL, TARGET_PAGE_SIZE);
2316 mmio->iomem.subpage = true;
2317 #if defined(DEBUG_SUBPAGE)
2318 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2319 mmio, base, TARGET_PAGE_SIZE);
2320 #endif
2321 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2323 return mmio;
2326 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2327 MemoryRegion *mr)
2329 assert(as);
2330 MemoryRegionSection section = {
2331 .address_space = as,
2332 .mr = mr,
2333 .offset_within_address_space = 0,
2334 .offset_within_region = 0,
2335 .size = int128_2_64(),
2338 return phys_section_add(map, &section);
2341 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2343 int asidx = cpu_asidx_from_attrs(cpu, attrs);
2344 CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2345 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2346 MemoryRegionSection *sections = d->map.sections;
2348 return sections[index & ~TARGET_PAGE_MASK].mr;
2351 static void io_mem_init(void)
2353 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2354 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2355 NULL, UINT64_MAX);
2356 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2357 NULL, UINT64_MAX);
2358 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2359 NULL, UINT64_MAX);
2362 static void mem_begin(MemoryListener *listener)
2364 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2365 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2366 uint16_t n;
2368 n = dummy_section(&d->map, as, &io_mem_unassigned);
2369 assert(n == PHYS_SECTION_UNASSIGNED);
2370 n = dummy_section(&d->map, as, &io_mem_notdirty);
2371 assert(n == PHYS_SECTION_NOTDIRTY);
2372 n = dummy_section(&d->map, as, &io_mem_rom);
2373 assert(n == PHYS_SECTION_ROM);
2374 n = dummy_section(&d->map, as, &io_mem_watch);
2375 assert(n == PHYS_SECTION_WATCH);
2377 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2378 d->as = as;
2379 as->next_dispatch = d;
2382 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2384 phys_sections_free(&d->map);
2385 g_free(d);
2388 static void mem_commit(MemoryListener *listener)
2390 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2391 AddressSpaceDispatch *cur = as->dispatch;
2392 AddressSpaceDispatch *next = as->next_dispatch;
2394 phys_page_compact_all(next, next->map.nodes_nb);
2396 atomic_rcu_set(&as->dispatch, next);
2397 if (cur) {
2398 call_rcu(cur, address_space_dispatch_free, rcu);
2402 static void tcg_commit(MemoryListener *listener)
2404 CPUAddressSpace *cpuas;
2405 AddressSpaceDispatch *d;
2407 /* since each CPU stores ram addresses in its TLB cache, we must
2408 reset the modified entries */
2409 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2410 cpu_reloading_memory_map();
2411 /* The CPU and TLB are protected by the iothread lock.
2412 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2413 * may have split the RCU critical section.
2415 d = atomic_rcu_read(&cpuas->as->dispatch);
2416 atomic_rcu_set(&cpuas->memory_dispatch, d);
2417 tlb_flush(cpuas->cpu, 1);
2420 void address_space_init_dispatch(AddressSpace *as)
2422 as->dispatch = NULL;
2423 as->dispatch_listener = (MemoryListener) {
2424 .begin = mem_begin,
2425 .commit = mem_commit,
2426 .region_add = mem_add,
2427 .region_nop = mem_add,
2428 .priority = 0,
2430 memory_listener_register(&as->dispatch_listener, as);
2433 void address_space_unregister(AddressSpace *as)
2435 memory_listener_unregister(&as->dispatch_listener);
2438 void address_space_destroy_dispatch(AddressSpace *as)
2440 AddressSpaceDispatch *d = as->dispatch;
2442 atomic_rcu_set(&as->dispatch, NULL);
2443 if (d) {
2444 call_rcu(d, address_space_dispatch_free, rcu);
2448 static void memory_map_init(void)
2450 system_memory = g_malloc(sizeof(*system_memory));
2452 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2453 address_space_init(&address_space_memory, system_memory, "memory");
2455 system_io = g_malloc(sizeof(*system_io));
2456 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2457 65536);
2458 address_space_init(&address_space_io, system_io, "I/O");
2461 MemoryRegion *get_system_memory(void)
2463 return system_memory;
2466 MemoryRegion *get_system_io(void)
2468 return system_io;
2471 #endif /* !defined(CONFIG_USER_ONLY) */
2473 /* physical memory access (slow version, mainly for debug) */
2474 #if defined(CONFIG_USER_ONLY)
2475 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2476 uint8_t *buf, int len, int is_write)
2478 int l, flags;
2479 target_ulong page;
2480 void * p;
2482 while (len > 0) {
2483 page = addr & TARGET_PAGE_MASK;
2484 l = (page + TARGET_PAGE_SIZE) - addr;
2485 if (l > len)
2486 l = len;
2487 flags = page_get_flags(page);
2488 if (!(flags & PAGE_VALID))
2489 return -1;
2490 if (is_write) {
2491 if (!(flags & PAGE_WRITE))
2492 return -1;
2493 /* XXX: this code should not depend on lock_user */
2494 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2495 return -1;
2496 memcpy(p, buf, l);
2497 unlock_user(p, addr, l);
2498 } else {
2499 if (!(flags & PAGE_READ))
2500 return -1;
2501 /* XXX: this code should not depend on lock_user */
2502 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2503 return -1;
2504 memcpy(buf, p, l);
2505 unlock_user(p, addr, 0);
2507 len -= l;
2508 buf += l;
2509 addr += l;
2511 return 0;
2514 #else
2516 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2517 hwaddr length)
2519 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2520 addr += memory_region_get_ram_addr(mr);
2522 /* No early return if dirty_log_mask is or becomes 0, because
2523 * cpu_physical_memory_set_dirty_range will still call
2524 * xen_modified_memory.
2526 if (dirty_log_mask) {
2527 dirty_log_mask =
2528 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2530 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2531 tb_lock();
2532 tb_invalidate_phys_range(addr, addr + length);
2533 tb_unlock();
2534 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2536 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2539 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2541 unsigned access_size_max = mr->ops->valid.max_access_size;
2543 /* Regions are assumed to support 1-4 byte accesses unless
2544 otherwise specified. */
2545 if (access_size_max == 0) {
2546 access_size_max = 4;
2549 /* Bound the maximum access by the alignment of the address. */
2550 if (!mr->ops->impl.unaligned) {
2551 unsigned align_size_max = addr & -addr;
2552 if (align_size_max != 0 && align_size_max < access_size_max) {
2553 access_size_max = align_size_max;
2557 /* Don't attempt accesses larger than the maximum. */
2558 if (l > access_size_max) {
2559 l = access_size_max;
2561 l = pow2floor(l);
2563 return l;
2566 static bool prepare_mmio_access(MemoryRegion *mr)
2568 bool unlocked = !qemu_mutex_iothread_locked();
2569 bool release_lock = false;
2571 if (unlocked && mr->global_locking) {
2572 qemu_mutex_lock_iothread();
2573 unlocked = false;
2574 release_lock = true;
2576 if (mr->flush_coalesced_mmio) {
2577 if (unlocked) {
2578 qemu_mutex_lock_iothread();
2580 qemu_flush_coalesced_mmio_buffer();
2581 if (unlocked) {
2582 qemu_mutex_unlock_iothread();
2586 return release_lock;
2589 /* Called within RCU critical section. */
2590 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2591 MemTxAttrs attrs,
2592 const uint8_t *buf,
2593 int len, hwaddr addr1,
2594 hwaddr l, MemoryRegion *mr)
2596 uint8_t *ptr;
2597 uint64_t val;
2598 MemTxResult result = MEMTX_OK;
2599 bool release_lock = false;
2601 for (;;) {
2602 if (!memory_access_is_direct(mr, true)) {
2603 release_lock |= prepare_mmio_access(mr);
2604 l = memory_access_size(mr, l, addr1);
2605 /* XXX: could force current_cpu to NULL to avoid
2606 potential bugs */
2607 switch (l) {
2608 case 8:
2609 /* 64 bit write access */
2610 val = ldq_p(buf);
2611 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2612 attrs);
2613 break;
2614 case 4:
2615 /* 32 bit write access */
2616 val = ldl_p(buf);
2617 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2618 attrs);
2619 break;
2620 case 2:
2621 /* 16 bit write access */
2622 val = lduw_p(buf);
2623 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2624 attrs);
2625 break;
2626 case 1:
2627 /* 8 bit write access */
2628 val = ldub_p(buf);
2629 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2630 attrs);
2631 break;
2632 default:
2633 abort();
2635 } else {
2636 /* RAM case */
2637 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2638 memcpy(ptr, buf, l);
2639 invalidate_and_set_dirty(mr, addr1, l);
2642 if (release_lock) {
2643 qemu_mutex_unlock_iothread();
2644 release_lock = false;
2647 len -= l;
2648 buf += l;
2649 addr += l;
2651 if (!len) {
2652 break;
2655 l = len;
2656 mr = address_space_translate(as, addr, &addr1, &l, true);
2659 return result;
2662 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2663 const uint8_t *buf, int len)
2665 hwaddr l;
2666 hwaddr addr1;
2667 MemoryRegion *mr;
2668 MemTxResult result = MEMTX_OK;
2670 if (len > 0) {
2671 rcu_read_lock();
2672 l = len;
2673 mr = address_space_translate(as, addr, &addr1, &l, true);
2674 result = address_space_write_continue(as, addr, attrs, buf, len,
2675 addr1, l, mr);
2676 rcu_read_unlock();
2679 return result;
2682 /* Called within RCU critical section. */
2683 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2684 MemTxAttrs attrs, uint8_t *buf,
2685 int len, hwaddr addr1, hwaddr l,
2686 MemoryRegion *mr)
2688 uint8_t *ptr;
2689 uint64_t val;
2690 MemTxResult result = MEMTX_OK;
2691 bool release_lock = false;
2693 for (;;) {
2694 if (!memory_access_is_direct(mr, false)) {
2695 /* I/O case */
2696 release_lock |= prepare_mmio_access(mr);
2697 l = memory_access_size(mr, l, addr1);
2698 switch (l) {
2699 case 8:
2700 /* 64 bit read access */
2701 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2702 attrs);
2703 stq_p(buf, val);
2704 break;
2705 case 4:
2706 /* 32 bit read access */
2707 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2708 attrs);
2709 stl_p(buf, val);
2710 break;
2711 case 2:
2712 /* 16 bit read access */
2713 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2714 attrs);
2715 stw_p(buf, val);
2716 break;
2717 case 1:
2718 /* 8 bit read access */
2719 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2720 attrs);
2721 stb_p(buf, val);
2722 break;
2723 default:
2724 abort();
2726 } else {
2727 /* RAM case */
2728 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2729 memcpy(buf, ptr, l);
2732 if (release_lock) {
2733 qemu_mutex_unlock_iothread();
2734 release_lock = false;
2737 len -= l;
2738 buf += l;
2739 addr += l;
2741 if (!len) {
2742 break;
2745 l = len;
2746 mr = address_space_translate(as, addr, &addr1, &l, false);
2749 return result;
2752 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2753 MemTxAttrs attrs, uint8_t *buf, int len)
2755 hwaddr l;
2756 hwaddr addr1;
2757 MemoryRegion *mr;
2758 MemTxResult result = MEMTX_OK;
2760 if (len > 0) {
2761 rcu_read_lock();
2762 l = len;
2763 mr = address_space_translate(as, addr, &addr1, &l, false);
2764 result = address_space_read_continue(as, addr, attrs, buf, len,
2765 addr1, l, mr);
2766 rcu_read_unlock();
2769 return result;
2772 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2773 uint8_t *buf, int len, bool is_write)
2775 if (is_write) {
2776 return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2777 } else {
2778 return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2782 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2783 int len, int is_write)
2785 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2786 buf, len, is_write);
2789 enum write_rom_type {
2790 WRITE_DATA,
2791 FLUSH_CACHE,
2794 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2795 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2797 hwaddr l;
2798 uint8_t *ptr;
2799 hwaddr addr1;
2800 MemoryRegion *mr;
2802 rcu_read_lock();
2803 while (len > 0) {
2804 l = len;
2805 mr = address_space_translate(as, addr, &addr1, &l, true);
2807 if (!(memory_region_is_ram(mr) ||
2808 memory_region_is_romd(mr))) {
2809 l = memory_access_size(mr, l, addr1);
2810 } else {
2811 /* ROM/RAM case */
2812 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2813 switch (type) {
2814 case WRITE_DATA:
2815 memcpy(ptr, buf, l);
2816 invalidate_and_set_dirty(mr, addr1, l);
2817 break;
2818 case FLUSH_CACHE:
2819 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2820 break;
2823 len -= l;
2824 buf += l;
2825 addr += l;
2827 rcu_read_unlock();
2830 /* used for ROM loading : can write in RAM and ROM */
2831 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2832 const uint8_t *buf, int len)
2834 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2837 void cpu_flush_icache_range(hwaddr start, int len)
2840 * This function should do the same thing as an icache flush that was
2841 * triggered from within the guest. For TCG we are always cache coherent,
2842 * so there is no need to flush anything. For KVM / Xen we need to flush
2843 * the host's instruction cache at least.
2845 if (tcg_enabled()) {
2846 return;
2849 cpu_physical_memory_write_rom_internal(&address_space_memory,
2850 start, NULL, len, FLUSH_CACHE);
2853 typedef struct {
2854 MemoryRegion *mr;
2855 void *buffer;
2856 hwaddr addr;
2857 hwaddr len;
2858 bool in_use;
2859 } BounceBuffer;
2861 static BounceBuffer bounce;
2863 typedef struct MapClient {
2864 QEMUBH *bh;
2865 QLIST_ENTRY(MapClient) link;
2866 } MapClient;
2868 QemuMutex map_client_list_lock;
2869 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2870 = QLIST_HEAD_INITIALIZER(map_client_list);
2872 static void cpu_unregister_map_client_do(MapClient *client)
2874 QLIST_REMOVE(client, link);
2875 g_free(client);
2878 static void cpu_notify_map_clients_locked(void)
2880 MapClient *client;
2882 while (!QLIST_EMPTY(&map_client_list)) {
2883 client = QLIST_FIRST(&map_client_list);
2884 qemu_bh_schedule(client->bh);
2885 cpu_unregister_map_client_do(client);
2889 void cpu_register_map_client(QEMUBH *bh)
2891 MapClient *client = g_malloc(sizeof(*client));
2893 qemu_mutex_lock(&map_client_list_lock);
2894 client->bh = bh;
2895 QLIST_INSERT_HEAD(&map_client_list, client, link);
2896 if (!atomic_read(&bounce.in_use)) {
2897 cpu_notify_map_clients_locked();
2899 qemu_mutex_unlock(&map_client_list_lock);
2902 void cpu_exec_init_all(void)
2904 qemu_mutex_init(&ram_list.mutex);
2905 /* The data structures we set up here depend on knowing the page size,
2906 * so no more changes can be made after this point.
2907 * In an ideal world, nothing we did before we had finished the
2908 * machine setup would care about the target page size, and we could
2909 * do this much later, rather than requiring board models to state
2910 * up front what their requirements are.
2912 finalize_target_page_bits();
2913 io_mem_init();
2914 memory_map_init();
2915 qemu_mutex_init(&map_client_list_lock);
2918 void cpu_unregister_map_client(QEMUBH *bh)
2920 MapClient *client;
2922 qemu_mutex_lock(&map_client_list_lock);
2923 QLIST_FOREACH(client, &map_client_list, link) {
2924 if (client->bh == bh) {
2925 cpu_unregister_map_client_do(client);
2926 break;
2929 qemu_mutex_unlock(&map_client_list_lock);
2932 static void cpu_notify_map_clients(void)
2934 qemu_mutex_lock(&map_client_list_lock);
2935 cpu_notify_map_clients_locked();
2936 qemu_mutex_unlock(&map_client_list_lock);
2939 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2941 MemoryRegion *mr;
2942 hwaddr l, xlat;
2944 rcu_read_lock();
2945 while (len > 0) {
2946 l = len;
2947 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2948 if (!memory_access_is_direct(mr, is_write)) {
2949 l = memory_access_size(mr, l, addr);
2950 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2951 return false;
2955 len -= l;
2956 addr += l;
2958 rcu_read_unlock();
2959 return true;
2962 /* Map a physical memory region into a host virtual address.
2963 * May map a subset of the requested range, given by and returned in *plen.
2964 * May return NULL if resources needed to perform the mapping are exhausted.
2965 * Use only for reads OR writes - not for read-modify-write operations.
2966 * Use cpu_register_map_client() to know when retrying the map operation is
2967 * likely to succeed.
2969 void *address_space_map(AddressSpace *as,
2970 hwaddr addr,
2971 hwaddr *plen,
2972 bool is_write)
2974 hwaddr len = *plen;
2975 hwaddr done = 0;
2976 hwaddr l, xlat, base;
2977 MemoryRegion *mr, *this_mr;
2978 void *ptr;
2980 if (len == 0) {
2981 return NULL;
2984 l = len;
2985 rcu_read_lock();
2986 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2988 if (!memory_access_is_direct(mr, is_write)) {
2989 if (atomic_xchg(&bounce.in_use, true)) {
2990 rcu_read_unlock();
2991 return NULL;
2993 /* Avoid unbounded allocations */
2994 l = MIN(l, TARGET_PAGE_SIZE);
2995 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2996 bounce.addr = addr;
2997 bounce.len = l;
2999 memory_region_ref(mr);
3000 bounce.mr = mr;
3001 if (!is_write) {
3002 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
3003 bounce.buffer, l);
3006 rcu_read_unlock();
3007 *plen = l;
3008 return bounce.buffer;
3011 base = xlat;
3013 for (;;) {
3014 len -= l;
3015 addr += l;
3016 done += l;
3017 if (len == 0) {
3018 break;
3021 l = len;
3022 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
3023 if (this_mr != mr || xlat != base + done) {
3024 break;
3028 memory_region_ref(mr);
3029 *plen = done;
3030 ptr = qemu_ram_ptr_length(mr->ram_block, base, plen);
3031 rcu_read_unlock();
3033 return ptr;
3036 /* Unmaps a memory region previously mapped by address_space_map().
3037 * Will also mark the memory as dirty if is_write == 1. access_len gives
3038 * the amount of memory that was actually read or written by the caller.
3040 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3041 int is_write, hwaddr access_len)
3043 if (buffer != bounce.buffer) {
3044 MemoryRegion *mr;
3045 ram_addr_t addr1;
3047 mr = memory_region_from_host(buffer, &addr1);
3048 assert(mr != NULL);
3049 if (is_write) {
3050 invalidate_and_set_dirty(mr, addr1, access_len);
3052 if (xen_enabled()) {
3053 xen_invalidate_map_cache_entry(buffer);
3055 memory_region_unref(mr);
3056 return;
3058 if (is_write) {
3059 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
3060 bounce.buffer, access_len);
3062 qemu_vfree(bounce.buffer);
3063 bounce.buffer = NULL;
3064 memory_region_unref(bounce.mr);
3065 atomic_mb_set(&bounce.in_use, false);
3066 cpu_notify_map_clients();
3069 void *cpu_physical_memory_map(hwaddr addr,
3070 hwaddr *plen,
3071 int is_write)
3073 return address_space_map(&address_space_memory, addr, plen, is_write);
3076 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3077 int is_write, hwaddr access_len)
3079 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3082 /* warning: addr must be aligned */
3083 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3084 MemTxAttrs attrs,
3085 MemTxResult *result,
3086 enum device_endian endian)
3088 uint8_t *ptr;
3089 uint64_t val;
3090 MemoryRegion *mr;
3091 hwaddr l = 4;
3092 hwaddr addr1;
3093 MemTxResult r;
3094 bool release_lock = false;
3096 rcu_read_lock();
3097 mr = address_space_translate(as, addr, &addr1, &l, false);
3098 if (l < 4 || !memory_access_is_direct(mr, false)) {
3099 release_lock |= prepare_mmio_access(mr);
3101 /* I/O case */
3102 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3103 #if defined(TARGET_WORDS_BIGENDIAN)
3104 if (endian == DEVICE_LITTLE_ENDIAN) {
3105 val = bswap32(val);
3107 #else
3108 if (endian == DEVICE_BIG_ENDIAN) {
3109 val = bswap32(val);
3111 #endif
3112 } else {
3113 /* RAM case */
3114 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3115 switch (endian) {
3116 case DEVICE_LITTLE_ENDIAN:
3117 val = ldl_le_p(ptr);
3118 break;
3119 case DEVICE_BIG_ENDIAN:
3120 val = ldl_be_p(ptr);
3121 break;
3122 default:
3123 val = ldl_p(ptr);
3124 break;
3126 r = MEMTX_OK;
3128 if (result) {
3129 *result = r;
3131 if (release_lock) {
3132 qemu_mutex_unlock_iothread();
3134 rcu_read_unlock();
3135 return val;
3138 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3139 MemTxAttrs attrs, MemTxResult *result)
3141 return address_space_ldl_internal(as, addr, attrs, result,
3142 DEVICE_NATIVE_ENDIAN);
3145 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3146 MemTxAttrs attrs, MemTxResult *result)
3148 return address_space_ldl_internal(as, addr, attrs, result,
3149 DEVICE_LITTLE_ENDIAN);
3152 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3153 MemTxAttrs attrs, MemTxResult *result)
3155 return address_space_ldl_internal(as, addr, attrs, result,
3156 DEVICE_BIG_ENDIAN);
3159 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3161 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3164 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3166 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3169 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3171 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3174 /* warning: addr must be aligned */
3175 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3176 MemTxAttrs attrs,
3177 MemTxResult *result,
3178 enum device_endian endian)
3180 uint8_t *ptr;
3181 uint64_t val;
3182 MemoryRegion *mr;
3183 hwaddr l = 8;
3184 hwaddr addr1;
3185 MemTxResult r;
3186 bool release_lock = false;
3188 rcu_read_lock();
3189 mr = address_space_translate(as, addr, &addr1, &l,
3190 false);
3191 if (l < 8 || !memory_access_is_direct(mr, false)) {
3192 release_lock |= prepare_mmio_access(mr);
3194 /* I/O case */
3195 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3196 #if defined(TARGET_WORDS_BIGENDIAN)
3197 if (endian == DEVICE_LITTLE_ENDIAN) {
3198 val = bswap64(val);
3200 #else
3201 if (endian == DEVICE_BIG_ENDIAN) {
3202 val = bswap64(val);
3204 #endif
3205 } else {
3206 /* RAM case */
3207 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3208 switch (endian) {
3209 case DEVICE_LITTLE_ENDIAN:
3210 val = ldq_le_p(ptr);
3211 break;
3212 case DEVICE_BIG_ENDIAN:
3213 val = ldq_be_p(ptr);
3214 break;
3215 default:
3216 val = ldq_p(ptr);
3217 break;
3219 r = MEMTX_OK;
3221 if (result) {
3222 *result = r;
3224 if (release_lock) {
3225 qemu_mutex_unlock_iothread();
3227 rcu_read_unlock();
3228 return val;
3231 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3232 MemTxAttrs attrs, MemTxResult *result)
3234 return address_space_ldq_internal(as, addr, attrs, result,
3235 DEVICE_NATIVE_ENDIAN);
3238 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3239 MemTxAttrs attrs, MemTxResult *result)
3241 return address_space_ldq_internal(as, addr, attrs, result,
3242 DEVICE_LITTLE_ENDIAN);
3245 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3246 MemTxAttrs attrs, MemTxResult *result)
3248 return address_space_ldq_internal(as, addr, attrs, result,
3249 DEVICE_BIG_ENDIAN);
3252 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3254 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3257 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3259 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3262 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3264 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3267 /* XXX: optimize */
3268 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3269 MemTxAttrs attrs, MemTxResult *result)
3271 uint8_t val;
3272 MemTxResult r;
3274 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3275 if (result) {
3276 *result = r;
3278 return val;
3281 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3283 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3286 /* warning: addr must be aligned */
3287 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3288 hwaddr addr,
3289 MemTxAttrs attrs,
3290 MemTxResult *result,
3291 enum device_endian endian)
3293 uint8_t *ptr;
3294 uint64_t val;
3295 MemoryRegion *mr;
3296 hwaddr l = 2;
3297 hwaddr addr1;
3298 MemTxResult r;
3299 bool release_lock = false;
3301 rcu_read_lock();
3302 mr = address_space_translate(as, addr, &addr1, &l,
3303 false);
3304 if (l < 2 || !memory_access_is_direct(mr, false)) {
3305 release_lock |= prepare_mmio_access(mr);
3307 /* I/O case */
3308 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3309 #if defined(TARGET_WORDS_BIGENDIAN)
3310 if (endian == DEVICE_LITTLE_ENDIAN) {
3311 val = bswap16(val);
3313 #else
3314 if (endian == DEVICE_BIG_ENDIAN) {
3315 val = bswap16(val);
3317 #endif
3318 } else {
3319 /* RAM case */
3320 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3321 switch (endian) {
3322 case DEVICE_LITTLE_ENDIAN:
3323 val = lduw_le_p(ptr);
3324 break;
3325 case DEVICE_BIG_ENDIAN:
3326 val = lduw_be_p(ptr);
3327 break;
3328 default:
3329 val = lduw_p(ptr);
3330 break;
3332 r = MEMTX_OK;
3334 if (result) {
3335 *result = r;
3337 if (release_lock) {
3338 qemu_mutex_unlock_iothread();
3340 rcu_read_unlock();
3341 return val;
3344 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3345 MemTxAttrs attrs, MemTxResult *result)
3347 return address_space_lduw_internal(as, addr, attrs, result,
3348 DEVICE_NATIVE_ENDIAN);
3351 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3352 MemTxAttrs attrs, MemTxResult *result)
3354 return address_space_lduw_internal(as, addr, attrs, result,
3355 DEVICE_LITTLE_ENDIAN);
3358 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3359 MemTxAttrs attrs, MemTxResult *result)
3361 return address_space_lduw_internal(as, addr, attrs, result,
3362 DEVICE_BIG_ENDIAN);
3365 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3367 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3370 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3372 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3375 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3377 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3380 /* warning: addr must be aligned. The ram page is not masked as dirty
3381 and the code inside is not invalidated. It is useful if the dirty
3382 bits are used to track modified PTEs */
3383 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3384 MemTxAttrs attrs, MemTxResult *result)
3386 uint8_t *ptr;
3387 MemoryRegion *mr;
3388 hwaddr l = 4;
3389 hwaddr addr1;
3390 MemTxResult r;
3391 uint8_t dirty_log_mask;
3392 bool release_lock = false;
3394 rcu_read_lock();
3395 mr = address_space_translate(as, addr, &addr1, &l,
3396 true);
3397 if (l < 4 || !memory_access_is_direct(mr, true)) {
3398 release_lock |= prepare_mmio_access(mr);
3400 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3401 } else {
3402 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3403 stl_p(ptr, val);
3405 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3406 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3407 cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr,
3408 4, dirty_log_mask);
3409 r = MEMTX_OK;
3411 if (result) {
3412 *result = r;
3414 if (release_lock) {
3415 qemu_mutex_unlock_iothread();
3417 rcu_read_unlock();
3420 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3422 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3425 /* warning: addr must be aligned */
3426 static inline void address_space_stl_internal(AddressSpace *as,
3427 hwaddr addr, uint32_t val,
3428 MemTxAttrs attrs,
3429 MemTxResult *result,
3430 enum device_endian endian)
3432 uint8_t *ptr;
3433 MemoryRegion *mr;
3434 hwaddr l = 4;
3435 hwaddr addr1;
3436 MemTxResult r;
3437 bool release_lock = false;
3439 rcu_read_lock();
3440 mr = address_space_translate(as, addr, &addr1, &l,
3441 true);
3442 if (l < 4 || !memory_access_is_direct(mr, true)) {
3443 release_lock |= prepare_mmio_access(mr);
3445 #if defined(TARGET_WORDS_BIGENDIAN)
3446 if (endian == DEVICE_LITTLE_ENDIAN) {
3447 val = bswap32(val);
3449 #else
3450 if (endian == DEVICE_BIG_ENDIAN) {
3451 val = bswap32(val);
3453 #endif
3454 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3455 } else {
3456 /* RAM case */
3457 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3458 switch (endian) {
3459 case DEVICE_LITTLE_ENDIAN:
3460 stl_le_p(ptr, val);
3461 break;
3462 case DEVICE_BIG_ENDIAN:
3463 stl_be_p(ptr, val);
3464 break;
3465 default:
3466 stl_p(ptr, val);
3467 break;
3469 invalidate_and_set_dirty(mr, addr1, 4);
3470 r = MEMTX_OK;
3472 if (result) {
3473 *result = r;
3475 if (release_lock) {
3476 qemu_mutex_unlock_iothread();
3478 rcu_read_unlock();
3481 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3482 MemTxAttrs attrs, MemTxResult *result)
3484 address_space_stl_internal(as, addr, val, attrs, result,
3485 DEVICE_NATIVE_ENDIAN);
3488 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3489 MemTxAttrs attrs, MemTxResult *result)
3491 address_space_stl_internal(as, addr, val, attrs, result,
3492 DEVICE_LITTLE_ENDIAN);
3495 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3496 MemTxAttrs attrs, MemTxResult *result)
3498 address_space_stl_internal(as, addr, val, attrs, result,
3499 DEVICE_BIG_ENDIAN);
3502 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3504 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3507 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3509 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3512 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3514 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3517 /* XXX: optimize */
3518 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3519 MemTxAttrs attrs, MemTxResult *result)
3521 uint8_t v = val;
3522 MemTxResult r;
3524 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3525 if (result) {
3526 *result = r;
3530 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3532 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3535 /* warning: addr must be aligned */
3536 static inline void address_space_stw_internal(AddressSpace *as,
3537 hwaddr addr, uint32_t val,
3538 MemTxAttrs attrs,
3539 MemTxResult *result,
3540 enum device_endian endian)
3542 uint8_t *ptr;
3543 MemoryRegion *mr;
3544 hwaddr l = 2;
3545 hwaddr addr1;
3546 MemTxResult r;
3547 bool release_lock = false;
3549 rcu_read_lock();
3550 mr = address_space_translate(as, addr, &addr1, &l, true);
3551 if (l < 2 || !memory_access_is_direct(mr, true)) {
3552 release_lock |= prepare_mmio_access(mr);
3554 #if defined(TARGET_WORDS_BIGENDIAN)
3555 if (endian == DEVICE_LITTLE_ENDIAN) {
3556 val = bswap16(val);
3558 #else
3559 if (endian == DEVICE_BIG_ENDIAN) {
3560 val = bswap16(val);
3562 #endif
3563 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3564 } else {
3565 /* RAM case */
3566 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3567 switch (endian) {
3568 case DEVICE_LITTLE_ENDIAN:
3569 stw_le_p(ptr, val);
3570 break;
3571 case DEVICE_BIG_ENDIAN:
3572 stw_be_p(ptr, val);
3573 break;
3574 default:
3575 stw_p(ptr, val);
3576 break;
3578 invalidate_and_set_dirty(mr, addr1, 2);
3579 r = MEMTX_OK;
3581 if (result) {
3582 *result = r;
3584 if (release_lock) {
3585 qemu_mutex_unlock_iothread();
3587 rcu_read_unlock();
3590 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3591 MemTxAttrs attrs, MemTxResult *result)
3593 address_space_stw_internal(as, addr, val, attrs, result,
3594 DEVICE_NATIVE_ENDIAN);
3597 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3598 MemTxAttrs attrs, MemTxResult *result)
3600 address_space_stw_internal(as, addr, val, attrs, result,
3601 DEVICE_LITTLE_ENDIAN);
3604 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3605 MemTxAttrs attrs, MemTxResult *result)
3607 address_space_stw_internal(as, addr, val, attrs, result,
3608 DEVICE_BIG_ENDIAN);
3611 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3613 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3616 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3618 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3621 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3623 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3626 /* XXX: optimize */
3627 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3628 MemTxAttrs attrs, MemTxResult *result)
3630 MemTxResult r;
3631 val = tswap64(val);
3632 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3633 if (result) {
3634 *result = r;
3638 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3639 MemTxAttrs attrs, MemTxResult *result)
3641 MemTxResult r;
3642 val = cpu_to_le64(val);
3643 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3644 if (result) {
3645 *result = r;
3648 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3649 MemTxAttrs attrs, MemTxResult *result)
3651 MemTxResult r;
3652 val = cpu_to_be64(val);
3653 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3654 if (result) {
3655 *result = r;
3659 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3661 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3664 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3666 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3669 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3671 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3674 /* virtual memory access for debug (includes writing to ROM) */
3675 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3676 uint8_t *buf, int len, int is_write)
3678 int l;
3679 hwaddr phys_addr;
3680 target_ulong page;
3682 while (len > 0) {
3683 int asidx;
3684 MemTxAttrs attrs;
3686 page = addr & TARGET_PAGE_MASK;
3687 phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3688 asidx = cpu_asidx_from_attrs(cpu, attrs);
3689 /* if no physical page mapped, return an error */
3690 if (phys_addr == -1)
3691 return -1;
3692 l = (page + TARGET_PAGE_SIZE) - addr;
3693 if (l > len)
3694 l = len;
3695 phys_addr += (addr & ~TARGET_PAGE_MASK);
3696 if (is_write) {
3697 cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3698 phys_addr, buf, l);
3699 } else {
3700 address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3701 MEMTXATTRS_UNSPECIFIED,
3702 buf, l, 0);
3704 len -= l;
3705 buf += l;
3706 addr += l;
3708 return 0;
3712 * Allows code that needs to deal with migration bitmaps etc to still be built
3713 * target independent.
3715 size_t qemu_target_page_bits(void)
3717 return TARGET_PAGE_BITS;
3720 #endif
3723 * A helper function for the _utterly broken_ virtio device model to find out if
3724 * it's running on a big endian machine. Don't do this at home kids!
3726 bool target_words_bigendian(void);
3727 bool target_words_bigendian(void)
3729 #if defined(TARGET_WORDS_BIGENDIAN)
3730 return true;
3731 #else
3732 return false;
3733 #endif
3736 #ifndef CONFIG_USER_ONLY
3737 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3739 MemoryRegion*mr;
3740 hwaddr l = 1;
3741 bool res;
3743 rcu_read_lock();
3744 mr = address_space_translate(&address_space_memory,
3745 phys_addr, &phys_addr, &l, false);
3747 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3748 rcu_read_unlock();
3749 return res;
3752 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3754 RAMBlock *block;
3755 int ret = 0;
3757 rcu_read_lock();
3758 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3759 ret = func(block->idstr, block->host, block->offset,
3760 block->used_length, opaque);
3761 if (ret) {
3762 break;
3765 rcu_read_unlock();
3766 return ret;
3768 #endif