Merge remote-tracking branch 'stweil/hax-new'
[qemu/ar7.git] / exec.c
bloba7b1e0daf4791a96c4669845ef31acacba17c921
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include "qapi/error.h"
21 #ifndef _WIN32
22 #endif
24 #include "qemu/cutils.h"
25 #include "cpu.h"
26 #include "exec/exec-all.h"
27 #include "tcg.h"
28 #include "hw/qdev-core.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
31 #include "hw/xen/xen.h"
32 #endif
33 #include "sysemu/kvm.h"
34 #include "sysemu/hax.h"
35 #include "sysemu/sysemu.h"
36 #include "qemu/timer.h"
37 #include "qemu/config-file.h"
38 #include "qemu/error-report.h"
39 #if defined(CONFIG_USER_ONLY)
40 #include "qemu.h"
41 #else /* !CONFIG_USER_ONLY */
42 #include "hw/hw.h"
43 #include "exec/memory.h"
44 #include "exec/ioport.h"
45 #include "sysemu/dma.h"
46 #include "exec/address-spaces.h"
47 #include "sysemu/xen-mapcache.h"
48 #include "trace.h"
49 #endif
50 #include "exec/cpu-all.h"
51 #include "qemu/rcu_queue.h"
52 #include "qemu/main-loop.h"
53 #include "translate-all.h"
54 #include "sysemu/replay.h"
56 #include "exec/memory-internal.h"
57 #include "exec/ram_addr.h"
58 #include "exec/log.h"
60 #include "migration/vmstate.h"
62 #include "qemu/range.h"
63 #ifndef _WIN32
64 #include "qemu/mmap-alloc.h"
65 #endif
67 //#define DEBUG_SUBPAGE
69 #if !defined(CONFIG_USER_ONLY)
70 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
71 * are protected by the ramlist lock.
73 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
75 static MemoryRegion *system_memory;
76 static MemoryRegion *system_io;
78 AddressSpace address_space_io;
79 AddressSpace address_space_memory;
81 MemoryRegion io_mem_rom, io_mem_notdirty;
82 static MemoryRegion io_mem_unassigned;
84 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
85 #define RAM_PREALLOC (1 << 0)
87 /* RAM is mmap-ed with MAP_SHARED */
88 #define RAM_SHARED (1 << 1)
90 /* Only a portion of RAM (used_length) is actually used, and migrated.
91 * This used_length size can change across reboots.
93 #define RAM_RESIZEABLE (1 << 2)
95 #endif
97 #ifdef TARGET_PAGE_BITS_VARY
98 int target_page_bits;
99 bool target_page_bits_decided;
100 #endif
102 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
103 /* current CPU in the current thread. It is only valid inside
104 cpu_exec() */
105 __thread CPUState *current_cpu;
106 /* 0 = Do not count executed instructions.
107 1 = Precise instruction counting.
108 2 = Adaptive rate instruction counting. */
109 int use_icount;
111 bool set_preferred_target_page_bits(int bits)
113 /* The target page size is the lowest common denominator for all
114 * the CPUs in the system, so we can only make it smaller, never
115 * larger. And we can't make it smaller once we've committed to
116 * a particular size.
118 #ifdef TARGET_PAGE_BITS_VARY
119 assert(bits >= TARGET_PAGE_BITS_MIN);
120 if (target_page_bits == 0 || target_page_bits > bits) {
121 if (target_page_bits_decided) {
122 return false;
124 target_page_bits = bits;
126 #endif
127 return true;
130 #if !defined(CONFIG_USER_ONLY)
132 static void finalize_target_page_bits(void)
134 #ifdef TARGET_PAGE_BITS_VARY
135 if (target_page_bits == 0) {
136 target_page_bits = TARGET_PAGE_BITS_MIN;
138 target_page_bits_decided = true;
139 #endif
142 typedef struct PhysPageEntry PhysPageEntry;
144 struct PhysPageEntry {
145 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
146 uint32_t skip : 6;
147 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
148 uint32_t ptr : 26;
151 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
153 /* Size of the L2 (and L3, etc) page tables. */
154 #define ADDR_SPACE_BITS 64
156 #define P_L2_BITS 9
157 #define P_L2_SIZE (1 << P_L2_BITS)
159 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
161 typedef PhysPageEntry Node[P_L2_SIZE];
163 typedef struct PhysPageMap {
164 struct rcu_head rcu;
166 unsigned sections_nb;
167 unsigned sections_nb_alloc;
168 unsigned nodes_nb;
169 unsigned nodes_nb_alloc;
170 Node *nodes;
171 MemoryRegionSection *sections;
172 } PhysPageMap;
174 struct AddressSpaceDispatch {
175 struct rcu_head rcu;
177 MemoryRegionSection *mru_section;
178 /* This is a multi-level map on the physical address space.
179 * The bottom level has pointers to MemoryRegionSections.
181 PhysPageEntry phys_map;
182 PhysPageMap map;
183 AddressSpace *as;
186 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
187 typedef struct subpage_t {
188 MemoryRegion iomem;
189 AddressSpace *as;
190 hwaddr base;
191 uint16_t sub_section[];
192 } subpage_t;
194 #define PHYS_SECTION_UNASSIGNED 0
195 #define PHYS_SECTION_NOTDIRTY 1
196 #define PHYS_SECTION_ROM 2
197 #define PHYS_SECTION_WATCH 3
199 static void io_mem_init(void);
200 static void memory_map_init(void);
201 static void tcg_commit(MemoryListener *listener);
203 static MemoryRegion io_mem_watch;
206 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
207 * @cpu: the CPU whose AddressSpace this is
208 * @as: the AddressSpace itself
209 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
210 * @tcg_as_listener: listener for tracking changes to the AddressSpace
212 struct CPUAddressSpace {
213 CPUState *cpu;
214 AddressSpace *as;
215 struct AddressSpaceDispatch *memory_dispatch;
216 MemoryListener tcg_as_listener;
219 #endif
221 #if !defined(CONFIG_USER_ONLY)
223 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
225 static unsigned alloc_hint = 16;
226 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
227 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, alloc_hint);
228 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
229 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
230 alloc_hint = map->nodes_nb_alloc;
234 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
236 unsigned i;
237 uint32_t ret;
238 PhysPageEntry e;
239 PhysPageEntry *p;
241 ret = map->nodes_nb++;
242 p = map->nodes[ret];
243 assert(ret != PHYS_MAP_NODE_NIL);
244 assert(ret != map->nodes_nb_alloc);
246 e.skip = leaf ? 0 : 1;
247 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
248 for (i = 0; i < P_L2_SIZE; ++i) {
249 memcpy(&p[i], &e, sizeof(e));
251 return ret;
254 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
255 hwaddr *index, hwaddr *nb, uint16_t leaf,
256 int level)
258 PhysPageEntry *p;
259 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
261 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
262 lp->ptr = phys_map_node_alloc(map, level == 0);
264 p = map->nodes[lp->ptr];
265 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
267 while (*nb && lp < &p[P_L2_SIZE]) {
268 if ((*index & (step - 1)) == 0 && *nb >= step) {
269 lp->skip = 0;
270 lp->ptr = leaf;
271 *index += step;
272 *nb -= step;
273 } else {
274 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
276 ++lp;
280 static void phys_page_set(AddressSpaceDispatch *d,
281 hwaddr index, hwaddr nb,
282 uint16_t leaf)
284 /* Wildly overreserve - it doesn't matter much. */
285 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
287 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
290 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
291 * and update our entry so we can skip it and go directly to the destination.
293 static void phys_page_compact(PhysPageEntry *lp, Node *nodes)
295 unsigned valid_ptr = P_L2_SIZE;
296 int valid = 0;
297 PhysPageEntry *p;
298 int i;
300 if (lp->ptr == PHYS_MAP_NODE_NIL) {
301 return;
304 p = nodes[lp->ptr];
305 for (i = 0; i < P_L2_SIZE; i++) {
306 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
307 continue;
310 valid_ptr = i;
311 valid++;
312 if (p[i].skip) {
313 phys_page_compact(&p[i], nodes);
317 /* We can only compress if there's only one child. */
318 if (valid != 1) {
319 return;
322 assert(valid_ptr < P_L2_SIZE);
324 /* Don't compress if it won't fit in the # of bits we have. */
325 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
326 return;
329 lp->ptr = p[valid_ptr].ptr;
330 if (!p[valid_ptr].skip) {
331 /* If our only child is a leaf, make this a leaf. */
332 /* By design, we should have made this node a leaf to begin with so we
333 * should never reach here.
334 * But since it's so simple to handle this, let's do it just in case we
335 * change this rule.
337 lp->skip = 0;
338 } else {
339 lp->skip += p[valid_ptr].skip;
343 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
345 if (d->phys_map.skip) {
346 phys_page_compact(&d->phys_map, d->map.nodes);
350 static inline bool section_covers_addr(const MemoryRegionSection *section,
351 hwaddr addr)
353 /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
354 * the section must cover the entire address space.
356 return int128_gethi(section->size) ||
357 range_covers_byte(section->offset_within_address_space,
358 int128_getlo(section->size), addr);
361 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
362 Node *nodes, MemoryRegionSection *sections)
364 PhysPageEntry *p;
365 hwaddr index = addr >> TARGET_PAGE_BITS;
366 int i;
368 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
369 if (lp.ptr == PHYS_MAP_NODE_NIL) {
370 return &sections[PHYS_SECTION_UNASSIGNED];
372 p = nodes[lp.ptr];
373 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
376 if (section_covers_addr(&sections[lp.ptr], addr)) {
377 return &sections[lp.ptr];
378 } else {
379 return &sections[PHYS_SECTION_UNASSIGNED];
383 bool memory_region_is_unassigned(MemoryRegion *mr)
385 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
386 && mr != &io_mem_watch;
389 /* Called from RCU critical section */
390 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
391 hwaddr addr,
392 bool resolve_subpage)
394 MemoryRegionSection *section = atomic_read(&d->mru_section);
395 subpage_t *subpage;
396 bool update;
398 if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
399 section_covers_addr(section, addr)) {
400 update = false;
401 } else {
402 section = phys_page_find(d->phys_map, addr, d->map.nodes,
403 d->map.sections);
404 update = true;
406 if (resolve_subpage && section->mr->subpage) {
407 subpage = container_of(section->mr, subpage_t, iomem);
408 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
410 if (update) {
411 atomic_set(&d->mru_section, section);
413 return section;
416 /* Called from RCU critical section */
417 static MemoryRegionSection *
418 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
419 hwaddr *plen, bool resolve_subpage)
421 MemoryRegionSection *section;
422 MemoryRegion *mr;
423 Int128 diff;
425 section = address_space_lookup_region(d, addr, resolve_subpage);
426 /* Compute offset within MemoryRegionSection */
427 addr -= section->offset_within_address_space;
429 /* Compute offset within MemoryRegion */
430 *xlat = addr + section->offset_within_region;
432 mr = section->mr;
434 /* MMIO registers can be expected to perform full-width accesses based only
435 * on their address, without considering adjacent registers that could
436 * decode to completely different MemoryRegions. When such registers
437 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
438 * regions overlap wildly. For this reason we cannot clamp the accesses
439 * here.
441 * If the length is small (as is the case for address_space_ldl/stl),
442 * everything works fine. If the incoming length is large, however,
443 * the caller really has to do the clamping through memory_access_size.
445 if (memory_region_is_ram(mr)) {
446 diff = int128_sub(section->size, int128_make64(addr));
447 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
449 return section;
452 /* Called from RCU critical section */
453 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
454 hwaddr *xlat, hwaddr *plen,
455 bool is_write)
457 IOMMUTLBEntry iotlb;
458 MemoryRegionSection *section;
459 MemoryRegion *mr;
461 for (;;) {
462 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
463 section = address_space_translate_internal(d, addr, &addr, plen, true);
464 mr = section->mr;
466 if (!mr->iommu_ops) {
467 break;
470 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
471 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
472 | (addr & iotlb.addr_mask));
473 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
474 if (!(iotlb.perm & (1 << is_write))) {
475 mr = &io_mem_unassigned;
476 break;
479 as = iotlb.target_as;
482 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
483 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
484 *plen = MIN(page, *plen);
487 *xlat = addr;
488 return mr;
491 /* Called from RCU critical section */
492 MemoryRegionSection *
493 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
494 hwaddr *xlat, hwaddr *plen)
496 MemoryRegionSection *section;
497 AddressSpaceDispatch *d = atomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch);
499 section = address_space_translate_internal(d, addr, xlat, plen, false);
501 assert(!section->mr->iommu_ops);
502 return section;
504 #endif
506 #if !defined(CONFIG_USER_ONLY)
508 static int cpu_common_post_load(void *opaque, int version_id)
510 CPUState *cpu = opaque;
512 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
513 version_id is increased. */
514 cpu->interrupt_request &= ~0x01;
515 tlb_flush(cpu, 1);
517 return 0;
520 static int cpu_common_pre_load(void *opaque)
522 CPUState *cpu = opaque;
524 cpu->exception_index = -1;
526 return 0;
529 static bool cpu_common_exception_index_needed(void *opaque)
531 CPUState *cpu = opaque;
533 return tcg_enabled() && cpu->exception_index != -1;
536 static const VMStateDescription vmstate_cpu_common_exception_index = {
537 .name = "cpu_common/exception_index",
538 .version_id = 1,
539 .minimum_version_id = 1,
540 .needed = cpu_common_exception_index_needed,
541 .fields = (VMStateField[]) {
542 VMSTATE_INT32(exception_index, CPUState),
543 VMSTATE_END_OF_LIST()
547 static bool cpu_common_crash_occurred_needed(void *opaque)
549 CPUState *cpu = opaque;
551 return cpu->crash_occurred;
554 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
555 .name = "cpu_common/crash_occurred",
556 .version_id = 1,
557 .minimum_version_id = 1,
558 .needed = cpu_common_crash_occurred_needed,
559 .fields = (VMStateField[]) {
560 VMSTATE_BOOL(crash_occurred, CPUState),
561 VMSTATE_END_OF_LIST()
565 const VMStateDescription vmstate_cpu_common = {
566 .name = "cpu_common",
567 .version_id = 1,
568 .minimum_version_id = 1,
569 .pre_load = cpu_common_pre_load,
570 .post_load = cpu_common_post_load,
571 .fields = (VMStateField[]) {
572 VMSTATE_UINT32(halted, CPUState),
573 VMSTATE_UINT32(interrupt_request, CPUState),
574 VMSTATE_END_OF_LIST()
576 .subsections = (const VMStateDescription*[]) {
577 &vmstate_cpu_common_exception_index,
578 &vmstate_cpu_common_crash_occurred,
579 NULL
583 #endif
585 CPUState *qemu_get_cpu(int index)
587 CPUState *cpu;
589 CPU_FOREACH(cpu) {
590 if (cpu->cpu_index == index) {
591 return cpu;
595 return NULL;
598 #if !defined(CONFIG_USER_ONLY)
599 void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
601 CPUAddressSpace *newas;
603 /* Target code should have set num_ases before calling us */
604 assert(asidx < cpu->num_ases);
606 if (asidx == 0) {
607 /* address space 0 gets the convenience alias */
608 cpu->as = as;
611 /* KVM cannot currently support multiple address spaces. */
612 assert(asidx == 0 || !kvm_enabled());
614 if (!cpu->cpu_ases) {
615 cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
618 newas = &cpu->cpu_ases[asidx];
619 newas->cpu = cpu;
620 newas->as = as;
621 if (tcg_enabled()) {
622 newas->tcg_as_listener.commit = tcg_commit;
623 memory_listener_register(&newas->tcg_as_listener, as);
627 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
629 /* Return the AddressSpace corresponding to the specified index */
630 return cpu->cpu_ases[asidx].as;
632 #endif
634 void cpu_exec_unrealizefn(CPUState *cpu)
636 CPUClass *cc = CPU_GET_CLASS(cpu);
638 cpu_list_remove(cpu);
640 if (cc->vmsd != NULL) {
641 vmstate_unregister(NULL, cc->vmsd, cpu);
643 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
644 vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
648 void cpu_exec_initfn(CPUState *cpu)
650 #ifdef TARGET_WORDS_BIGENDIAN
651 cpu->bigendian = true;
652 #else
653 cpu->bigendian = false;
654 #endif
655 cpu->as = NULL;
656 cpu->num_ases = 0;
658 #ifndef CONFIG_USER_ONLY
659 cpu->thread_id = qemu_get_thread_id();
661 /* This is a softmmu CPU object, so create a property for it
662 * so users can wire up its memory. (This can't go in qom/cpu.c
663 * because that file is compiled only once for both user-mode
664 * and system builds.) The default if no link is set up is to use
665 * the system address space.
667 object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
668 (Object **)&cpu->memory,
669 qdev_prop_allow_set_link_before_realize,
670 OBJ_PROP_LINK_UNREF_ON_RELEASE,
671 &error_abort);
672 cpu->memory = system_memory;
673 object_ref(OBJECT(cpu->memory));
674 #endif
677 void cpu_exec_realizefn(CPUState *cpu, Error **errp)
679 CPUClass *cc ATTRIBUTE_UNUSED = CPU_GET_CLASS(cpu);
681 cpu_list_add(cpu);
683 #ifndef CONFIG_USER_ONLY
684 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
685 vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
687 if (cc->vmsd != NULL) {
688 vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
690 #endif
693 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
695 /* Flush the whole TB as this will not have race conditions
696 * even if we don't have proper locking yet.
697 * Ideally we would just invalidate the TBs for the
698 * specified PC.
700 tb_flush(cpu);
703 #if defined(CONFIG_USER_ONLY)
704 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
709 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
710 int flags)
712 return -ENOSYS;
715 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
719 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
720 int flags, CPUWatchpoint **watchpoint)
722 return -ENOSYS;
724 #else
725 /* Add a watchpoint. */
726 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
727 int flags, CPUWatchpoint **watchpoint)
729 CPUWatchpoint *wp;
731 /* forbid ranges which are empty or run off the end of the address space */
732 if (len == 0 || (addr + len - 1) < addr) {
733 error_report("tried to set invalid watchpoint at %"
734 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
735 return -EINVAL;
737 wp = g_malloc(sizeof(*wp));
739 wp->vaddr = addr;
740 wp->len = len;
741 wp->flags = flags;
743 /* keep all GDB-injected watchpoints in front */
744 if (flags & BP_GDB) {
745 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
746 } else {
747 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
750 tlb_flush_page(cpu, addr);
752 if (watchpoint)
753 *watchpoint = wp;
754 return 0;
757 /* Remove a specific watchpoint. */
758 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
759 int flags)
761 CPUWatchpoint *wp;
763 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
764 if (addr == wp->vaddr && len == wp->len
765 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
766 cpu_watchpoint_remove_by_ref(cpu, wp);
767 return 0;
770 return -ENOENT;
773 /* Remove a specific watchpoint by reference. */
774 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
776 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
778 tlb_flush_page(cpu, watchpoint->vaddr);
780 g_free(watchpoint);
783 /* Remove all matching watchpoints. */
784 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
786 CPUWatchpoint *wp, *next;
788 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
789 if (wp->flags & mask) {
790 cpu_watchpoint_remove_by_ref(cpu, wp);
795 /* Return true if this watchpoint address matches the specified
796 * access (ie the address range covered by the watchpoint overlaps
797 * partially or completely with the address range covered by the
798 * access).
800 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
801 vaddr addr,
802 vaddr len)
804 /* We know the lengths are non-zero, but a little caution is
805 * required to avoid errors in the case where the range ends
806 * exactly at the top of the address space and so addr + len
807 * wraps round to zero.
809 vaddr wpend = wp->vaddr + wp->len - 1;
810 vaddr addrend = addr + len - 1;
812 return !(addr > wpend || wp->vaddr > addrend);
815 #endif
817 /* Add a breakpoint. */
818 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
819 CPUBreakpoint **breakpoint)
821 CPUBreakpoint *bp;
823 bp = g_malloc(sizeof(*bp));
825 bp->pc = pc;
826 bp->flags = flags;
828 /* keep all GDB-injected breakpoints in front */
829 if (flags & BP_GDB) {
830 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
831 } else {
832 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
835 breakpoint_invalidate(cpu, pc);
837 if (breakpoint) {
838 *breakpoint = bp;
840 return 0;
843 /* Remove a specific breakpoint. */
844 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
846 CPUBreakpoint *bp;
848 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
849 if (bp->pc == pc && bp->flags == flags) {
850 cpu_breakpoint_remove_by_ref(cpu, bp);
851 return 0;
854 return -ENOENT;
857 /* Remove a specific breakpoint by reference. */
858 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
860 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
862 breakpoint_invalidate(cpu, breakpoint->pc);
864 g_free(breakpoint);
867 /* Remove all matching breakpoints. */
868 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
870 CPUBreakpoint *bp, *next;
872 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
873 if (bp->flags & mask) {
874 cpu_breakpoint_remove_by_ref(cpu, bp);
879 /* enable or disable single step mode. EXCP_DEBUG is returned by the
880 CPU loop after each instruction */
881 void cpu_single_step(CPUState *cpu, int enabled)
883 if (cpu->singlestep_enabled != enabled) {
884 cpu->singlestep_enabled = enabled;
885 if (kvm_enabled()) {
886 kvm_update_guest_debug(cpu, 0);
887 } else {
888 /* must flush all the translated code to avoid inconsistencies */
889 /* XXX: only flush what is necessary */
890 tb_flush(cpu);
895 void QEMU_NORETURN cpu_abort(CPUState *cpu, const char *fmt, ...)
897 va_list ap;
898 va_list ap2;
900 va_start(ap, fmt);
901 va_copy(ap2, ap);
902 fprintf(stderr, "qemu: fatal: ");
903 vfprintf(stderr, fmt, ap);
904 fprintf(stderr, "\n");
905 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
906 if (qemu_log_separate()) {
907 qemu_log_lock();
908 qemu_log("qemu: fatal: ");
909 qemu_log_vprintf(fmt, ap2);
910 qemu_log("\n");
911 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
912 qemu_log_flush();
913 qemu_log_unlock();
914 qemu_log_close();
916 va_end(ap2);
917 va_end(ap);
918 replay_finish();
919 #if defined(CONFIG_USER_ONLY)
921 struct sigaction act;
922 sigfillset(&act.sa_mask);
923 act.sa_handler = SIG_DFL;
924 sigaction(SIGABRT, &act, NULL);
926 #endif
927 abort();
930 #if !defined(CONFIG_USER_ONLY)
931 /* Called from RCU critical section */
932 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
934 RAMBlock *block;
936 block = atomic_rcu_read(&ram_list.mru_block);
937 if (block && addr - block->offset < block->max_length) {
938 return block;
940 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
941 if (addr - block->offset < block->max_length) {
942 goto found;
946 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
947 abort();
949 found:
950 /* It is safe to write mru_block outside the iothread lock. This
951 * is what happens:
953 * mru_block = xxx
954 * rcu_read_unlock()
955 * xxx removed from list
956 * rcu_read_lock()
957 * read mru_block
958 * mru_block = NULL;
959 * call_rcu(reclaim_ramblock, xxx);
960 * rcu_read_unlock()
962 * atomic_rcu_set is not needed here. The block was already published
963 * when it was placed into the list. Here we're just making an extra
964 * copy of the pointer.
966 ram_list.mru_block = block;
967 return block;
970 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
972 CPUState *cpu;
973 ram_addr_t start1;
974 RAMBlock *block;
975 ram_addr_t end;
977 end = TARGET_PAGE_ALIGN(start + length);
978 start &= TARGET_PAGE_MASK;
980 rcu_read_lock();
981 block = qemu_get_ram_block(start);
982 assert(block == qemu_get_ram_block(end - 1));
983 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
984 CPU_FOREACH(cpu) {
985 tlb_reset_dirty(cpu, start1, length);
987 rcu_read_unlock();
990 /* Note: start and end must be within the same ram block. */
991 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
992 ram_addr_t length,
993 unsigned client)
995 DirtyMemoryBlocks *blocks;
996 unsigned long end, page;
997 bool dirty = false;
999 if (length == 0) {
1000 return false;
1003 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
1004 page = start >> TARGET_PAGE_BITS;
1006 rcu_read_lock();
1008 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1010 while (page < end) {
1011 unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1012 unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1013 unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1015 dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1016 offset, num);
1017 page += num;
1020 rcu_read_unlock();
1022 if (dirty && tcg_enabled()) {
1023 tlb_reset_dirty_range_all(start, length);
1026 return dirty;
1029 /* Called from RCU critical section */
1030 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1031 MemoryRegionSection *section,
1032 target_ulong vaddr,
1033 hwaddr paddr, hwaddr xlat,
1034 int prot,
1035 target_ulong *address)
1037 hwaddr iotlb;
1038 CPUWatchpoint *wp;
1040 if (memory_region_is_ram(section->mr)) {
1041 /* Normal RAM. */
1042 iotlb = memory_region_get_ram_addr(section->mr) + xlat;
1043 if (!section->readonly) {
1044 iotlb |= PHYS_SECTION_NOTDIRTY;
1045 } else {
1046 iotlb |= PHYS_SECTION_ROM;
1048 } else {
1049 AddressSpaceDispatch *d;
1051 d = atomic_rcu_read(&section->address_space->dispatch);
1052 iotlb = section - d->map.sections;
1053 iotlb += xlat;
1056 /* Make accesses to pages with watchpoints go via the
1057 watchpoint trap routines. */
1058 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1059 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1060 /* Avoid trapping reads of pages with a write breakpoint. */
1061 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1062 iotlb = PHYS_SECTION_WATCH + paddr;
1063 *address |= TLB_MMIO;
1064 break;
1069 return iotlb;
1071 #endif /* defined(CONFIG_USER_ONLY) */
1073 #if !defined(CONFIG_USER_ONLY)
1075 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1076 uint16_t section);
1077 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1079 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1080 qemu_anon_ram_alloc;
1083 * Set a custom physical guest memory alloator.
1084 * Accelerators with unusual needs may need this. Hopefully, we can
1085 * get rid of it eventually.
1087 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1089 phys_mem_alloc = alloc;
1092 static uint16_t phys_section_add(PhysPageMap *map,
1093 MemoryRegionSection *section)
1095 /* The physical section number is ORed with a page-aligned
1096 * pointer to produce the iotlb entries. Thus it should
1097 * never overflow into the page-aligned value.
1099 assert(map->sections_nb < TARGET_PAGE_SIZE);
1101 if (map->sections_nb == map->sections_nb_alloc) {
1102 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1103 map->sections = g_renew(MemoryRegionSection, map->sections,
1104 map->sections_nb_alloc);
1106 map->sections[map->sections_nb] = *section;
1107 memory_region_ref(section->mr);
1108 return map->sections_nb++;
1111 static void phys_section_destroy(MemoryRegion *mr)
1113 bool have_sub_page = mr->subpage;
1115 memory_region_unref(mr);
1117 if (have_sub_page) {
1118 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1119 object_unref(OBJECT(&subpage->iomem));
1120 g_free(subpage);
1124 static void phys_sections_free(PhysPageMap *map)
1126 while (map->sections_nb > 0) {
1127 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1128 phys_section_destroy(section->mr);
1130 g_free(map->sections);
1131 g_free(map->nodes);
1134 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1136 subpage_t *subpage;
1137 hwaddr base = section->offset_within_address_space
1138 & TARGET_PAGE_MASK;
1139 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1140 d->map.nodes, d->map.sections);
1141 MemoryRegionSection subsection = {
1142 .offset_within_address_space = base,
1143 .size = int128_make64(TARGET_PAGE_SIZE),
1145 hwaddr start, end;
1147 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1149 if (!(existing->mr->subpage)) {
1150 subpage = subpage_init(d->as, base);
1151 subsection.address_space = d->as;
1152 subsection.mr = &subpage->iomem;
1153 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1154 phys_section_add(&d->map, &subsection));
1155 } else {
1156 subpage = container_of(existing->mr, subpage_t, iomem);
1158 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1159 end = start + int128_get64(section->size) - 1;
1160 subpage_register(subpage, start, end,
1161 phys_section_add(&d->map, section));
1165 static void register_multipage(AddressSpaceDispatch *d,
1166 MemoryRegionSection *section)
1168 hwaddr start_addr = section->offset_within_address_space;
1169 uint16_t section_index = phys_section_add(&d->map, section);
1170 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1171 TARGET_PAGE_BITS));
1173 assert(num_pages);
1174 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1177 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1179 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1180 AddressSpaceDispatch *d = as->next_dispatch;
1181 MemoryRegionSection now = *section, remain = *section;
1182 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1184 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1185 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1186 - now.offset_within_address_space;
1188 now.size = int128_min(int128_make64(left), now.size);
1189 register_subpage(d, &now);
1190 } else {
1191 now.size = int128_zero();
1193 while (int128_ne(remain.size, now.size)) {
1194 remain.size = int128_sub(remain.size, now.size);
1195 remain.offset_within_address_space += int128_get64(now.size);
1196 remain.offset_within_region += int128_get64(now.size);
1197 now = remain;
1198 if (int128_lt(remain.size, page_size)) {
1199 register_subpage(d, &now);
1200 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1201 now.size = page_size;
1202 register_subpage(d, &now);
1203 } else {
1204 now.size = int128_and(now.size, int128_neg(page_size));
1205 register_multipage(d, &now);
1210 void qemu_flush_coalesced_mmio_buffer(void)
1212 if (kvm_enabled())
1213 kvm_flush_coalesced_mmio_buffer();
1216 void qemu_mutex_lock_ramlist(void)
1218 qemu_mutex_lock(&ram_list.mutex);
1221 void qemu_mutex_unlock_ramlist(void)
1223 qemu_mutex_unlock(&ram_list.mutex);
1226 #ifdef __linux__
1227 static int64_t get_file_size(int fd)
1229 int64_t size = lseek(fd, 0, SEEK_END);
1230 if (size < 0) {
1231 return -errno;
1233 return size;
1236 static void *file_ram_alloc(RAMBlock *block,
1237 ram_addr_t memory,
1238 const char *path,
1239 Error **errp)
1241 bool unlink_on_error = false;
1242 char *filename;
1243 char *sanitized_name;
1244 char *c;
1245 void * volatile area = MAP_FAILED;
1246 int fd = -1;
1247 int64_t file_size;
1249 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1250 error_setg(errp,
1251 "host lacks kvm mmu notifiers, -mem-path unsupported");
1252 return NULL;
1255 for (;;) {
1256 fd = open(path, O_RDWR);
1257 if (fd >= 0) {
1258 /* @path names an existing file, use it */
1259 break;
1261 if (errno == ENOENT) {
1262 /* @path names a file that doesn't exist, create it */
1263 fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1264 if (fd >= 0) {
1265 unlink_on_error = true;
1266 break;
1268 } else if (errno == EISDIR) {
1269 /* @path names a directory, create a file there */
1270 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1271 sanitized_name = g_strdup(memory_region_name(block->mr));
1272 for (c = sanitized_name; *c != '\0'; c++) {
1273 if (*c == '/') {
1274 *c = '_';
1278 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1279 sanitized_name);
1280 g_free(sanitized_name);
1282 fd = mkstemp(filename);
1283 if (fd >= 0) {
1284 unlink(filename);
1285 g_free(filename);
1286 break;
1288 g_free(filename);
1290 if (errno != EEXIST && errno != EINTR) {
1291 error_setg_errno(errp, errno,
1292 "can't open backing store %s for guest RAM",
1293 path);
1294 goto error;
1297 * Try again on EINTR and EEXIST. The latter happens when
1298 * something else creates the file between our two open().
1302 block->page_size = qemu_fd_getpagesize(fd);
1303 block->mr->align = block->page_size;
1304 #if defined(__s390x__)
1305 if (kvm_enabled()) {
1306 block->mr->align = MAX(block->mr->align, QEMU_VMALLOC_ALIGN);
1308 #endif
1310 file_size = get_file_size(fd);
1312 if (memory < block->page_size) {
1313 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1314 "or larger than page size 0x%zx",
1315 memory, block->page_size);
1316 goto error;
1319 if (file_size > 0 && file_size < memory) {
1320 error_setg(errp, "backing store %s size 0x%" PRIx64
1321 " does not match 'size' option 0x" RAM_ADDR_FMT,
1322 path, file_size, memory);
1323 goto error;
1326 memory = ROUND_UP(memory, block->page_size);
1329 * ftruncate is not supported by hugetlbfs in older
1330 * hosts, so don't bother bailing out on errors.
1331 * If anything goes wrong with it under other filesystems,
1332 * mmap will fail.
1334 * Do not truncate the non-empty backend file to avoid corrupting
1335 * the existing data in the file. Disabling shrinking is not
1336 * enough. For example, the current vNVDIMM implementation stores
1337 * the guest NVDIMM labels at the end of the backend file. If the
1338 * backend file is later extended, QEMU will not be able to find
1339 * those labels. Therefore, extending the non-empty backend file
1340 * is disabled as well.
1342 if (!file_size && ftruncate(fd, memory)) {
1343 perror("ftruncate");
1346 area = qemu_ram_mmap(fd, memory, block->mr->align,
1347 block->flags & RAM_SHARED);
1348 if (area == MAP_FAILED) {
1349 error_setg_errno(errp, errno,
1350 "unable to map backing store for guest RAM");
1351 goto error;
1354 if (mem_prealloc) {
1355 os_mem_prealloc(fd, area, memory, errp);
1356 if (errp && *errp) {
1357 goto error;
1361 block->fd = fd;
1362 return area;
1364 error:
1365 if (area != MAP_FAILED) {
1366 qemu_ram_munmap(area, memory);
1368 if (unlink_on_error) {
1369 unlink(path);
1371 if (fd != -1) {
1372 close(fd);
1374 return NULL;
1376 #endif
1378 /* Called with the ramlist lock held. */
1379 static ram_addr_t find_ram_offset(ram_addr_t size)
1381 RAMBlock *block, *next_block;
1382 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1384 assert(size != 0); /* it would hand out same offset multiple times */
1386 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1387 return 0;
1390 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1391 ram_addr_t end, next = RAM_ADDR_MAX;
1393 end = block->offset + block->max_length;
1395 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1396 if (next_block->offset >= end) {
1397 next = MIN(next, next_block->offset);
1400 if (next - end >= size && next - end < mingap) {
1401 offset = end;
1402 mingap = next - end;
1406 if (offset == RAM_ADDR_MAX) {
1407 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1408 (uint64_t)size);
1409 abort();
1412 return offset;
1415 ram_addr_t last_ram_offset(void)
1417 RAMBlock *block;
1418 ram_addr_t last = 0;
1420 rcu_read_lock();
1421 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1422 last = MAX(last, block->offset + block->max_length);
1424 rcu_read_unlock();
1425 return last;
1428 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1430 int ret;
1432 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1433 if (!machine_dump_guest_core(current_machine)) {
1434 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1435 if (ret) {
1436 perror("qemu_madvise");
1437 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1438 "but dump_guest_core=off specified\n");
1443 const char *qemu_ram_get_idstr(RAMBlock *rb)
1445 return rb->idstr;
1448 /* Called with iothread lock held. */
1449 void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
1451 RAMBlock *block;
1453 assert(new_block);
1454 assert(!new_block->idstr[0]);
1456 if (dev) {
1457 char *id = qdev_get_dev_path(dev);
1458 if (id) {
1459 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1460 g_free(id);
1463 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1465 rcu_read_lock();
1466 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1467 if (block != new_block &&
1468 !strcmp(block->idstr, new_block->idstr)) {
1469 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1470 new_block->idstr);
1471 abort();
1474 rcu_read_unlock();
1477 /* Called with iothread lock held. */
1478 void qemu_ram_unset_idstr(RAMBlock *block)
1480 /* FIXME: arch_init.c assumes that this is not called throughout
1481 * migration. Ignore the problem since hot-unplug during migration
1482 * does not work anyway.
1484 if (block) {
1485 memset(block->idstr, 0, sizeof(block->idstr));
1489 size_t qemu_ram_pagesize(RAMBlock *rb)
1491 return rb->page_size;
1494 static int memory_try_enable_merging(void *addr, size_t len)
1496 if (!machine_mem_merge(current_machine)) {
1497 /* disabled by the user */
1498 return 0;
1501 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1504 /* Only legal before guest might have detected the memory size: e.g. on
1505 * incoming migration, or right after reset.
1507 * As memory core doesn't know how is memory accessed, it is up to
1508 * resize callback to update device state and/or add assertions to detect
1509 * misuse, if necessary.
1511 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
1513 assert(block);
1515 newsize = HOST_PAGE_ALIGN(newsize);
1517 if (block->used_length == newsize) {
1518 return 0;
1521 if (!(block->flags & RAM_RESIZEABLE)) {
1522 error_setg_errno(errp, EINVAL,
1523 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1524 " in != 0x" RAM_ADDR_FMT, block->idstr,
1525 newsize, block->used_length);
1526 return -EINVAL;
1529 if (block->max_length < newsize) {
1530 error_setg_errno(errp, EINVAL,
1531 "Length too large: %s: 0x" RAM_ADDR_FMT
1532 " > 0x" RAM_ADDR_FMT, block->idstr,
1533 newsize, block->max_length);
1534 return -EINVAL;
1537 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1538 block->used_length = newsize;
1539 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1540 DIRTY_CLIENTS_ALL);
1541 memory_region_set_size(block->mr, newsize);
1542 if (block->resized) {
1543 block->resized(block->idstr, newsize, block->host);
1545 return 0;
1548 /* Called with ram_list.mutex held */
1549 static void dirty_memory_extend(ram_addr_t old_ram_size,
1550 ram_addr_t new_ram_size)
1552 ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1553 DIRTY_MEMORY_BLOCK_SIZE);
1554 ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1555 DIRTY_MEMORY_BLOCK_SIZE);
1556 int i;
1558 /* Only need to extend if block count increased */
1559 if (new_num_blocks <= old_num_blocks) {
1560 return;
1563 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1564 DirtyMemoryBlocks *old_blocks;
1565 DirtyMemoryBlocks *new_blocks;
1566 int j;
1568 old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1569 new_blocks = g_malloc(sizeof(*new_blocks) +
1570 sizeof(new_blocks->blocks[0]) * new_num_blocks);
1572 if (old_num_blocks) {
1573 memcpy(new_blocks->blocks, old_blocks->blocks,
1574 old_num_blocks * sizeof(old_blocks->blocks[0]));
1577 for (j = old_num_blocks; j < new_num_blocks; j++) {
1578 new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1581 atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1583 if (old_blocks) {
1584 g_free_rcu(old_blocks, rcu);
1589 static void ram_block_add(RAMBlock *new_block, Error **errp)
1591 RAMBlock *block;
1592 RAMBlock *last_block = NULL;
1593 ram_addr_t old_ram_size, new_ram_size;
1594 Error *err = NULL;
1596 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1598 qemu_mutex_lock_ramlist();
1599 new_block->offset = find_ram_offset(new_block->max_length);
1601 if (!new_block->host) {
1602 if (xen_enabled()) {
1603 xen_ram_alloc(new_block->offset, new_block->max_length,
1604 new_block->mr, &err);
1605 if (err) {
1606 error_propagate(errp, err);
1607 qemu_mutex_unlock_ramlist();
1608 return;
1610 } else {
1611 new_block->host = phys_mem_alloc(new_block->max_length,
1612 &new_block->mr->align);
1614 * In Hax, the qemu allocate the virtual address, and HAX kernel
1615 * populate the memory with physical memory. Currently we have no
1616 * paging, so user should make sure enough free memory in advance
1618 if (hax_enabled()) {
1619 int ret;
1620 ret = hax_populate_ram((uint64_t)(uintptr_t)new_block->host,
1621 new_block->max_length);
1622 if (ret < 0) {
1623 error_setg(errp, "Hax failed to populate ram");
1624 return;
1628 if (!new_block->host) {
1629 error_setg_errno(errp, errno,
1630 "cannot set up guest memory '%s'",
1631 memory_region_name(new_block->mr));
1632 qemu_mutex_unlock_ramlist();
1633 return;
1636 * In Hax, the qemu allocate the virtual address, and HAX kernel
1637 * populate the memory with physical memory. Currently we have no
1638 * paging, so user should make sure enough free memory in advance
1640 if (hax_enabled()) {
1641 int ret;
1642 ret = hax_populate_ram((uint64_t)(uintptr_t)new_block->host,
1643 new_block->max_length);
1644 if (ret < 0) {
1645 error_setg(errp, "Hax failed to populate ram");
1646 return;
1650 memory_try_enable_merging(new_block->host, new_block->max_length);
1654 new_ram_size = MAX(old_ram_size,
1655 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1656 if (new_ram_size > old_ram_size) {
1657 migration_bitmap_extend(old_ram_size, new_ram_size);
1658 dirty_memory_extend(old_ram_size, new_ram_size);
1660 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1661 * QLIST (which has an RCU-friendly variant) does not have insertion at
1662 * tail, so save the last element in last_block.
1664 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1665 last_block = block;
1666 if (block->max_length < new_block->max_length) {
1667 break;
1670 if (block) {
1671 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1672 } else if (last_block) {
1673 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1674 } else { /* list is empty */
1675 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1677 ram_list.mru_block = NULL;
1679 /* Write list before version */
1680 smp_wmb();
1681 ram_list.version++;
1682 qemu_mutex_unlock_ramlist();
1684 cpu_physical_memory_set_dirty_range(new_block->offset,
1685 new_block->used_length,
1686 DIRTY_CLIENTS_ALL);
1688 if (new_block->host) {
1689 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1690 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1691 /* MADV_DONTFORK is also needed by KVM in absence of synchronous MMU */
1692 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1696 #ifdef __linux__
1697 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1698 bool share, const char *mem_path,
1699 Error **errp)
1701 RAMBlock *new_block;
1702 Error *local_err = NULL;
1704 if (xen_enabled()) {
1705 error_setg(errp, "-mem-path not supported with Xen");
1706 return NULL;
1709 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1711 * file_ram_alloc() needs to allocate just like
1712 * phys_mem_alloc, but we haven't bothered to provide
1713 * a hook there.
1715 error_setg(errp,
1716 "-mem-path not supported with this accelerator");
1717 return NULL;
1720 size = HOST_PAGE_ALIGN(size);
1721 new_block = g_malloc0(sizeof(*new_block));
1722 new_block->mr = mr;
1723 new_block->used_length = size;
1724 new_block->max_length = size;
1725 new_block->flags = share ? RAM_SHARED : 0;
1726 new_block->host = file_ram_alloc(new_block, size,
1727 mem_path, errp);
1728 if (!new_block->host) {
1729 g_free(new_block);
1730 return NULL;
1733 ram_block_add(new_block, &local_err);
1734 if (local_err) {
1735 g_free(new_block);
1736 error_propagate(errp, local_err);
1737 return NULL;
1739 return new_block;
1741 #endif
1743 static
1744 RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1745 void (*resized)(const char*,
1746 uint64_t length,
1747 void *host),
1748 void *host, bool resizeable,
1749 MemoryRegion *mr, Error **errp)
1751 RAMBlock *new_block;
1752 Error *local_err = NULL;
1754 size = HOST_PAGE_ALIGN(size);
1755 max_size = HOST_PAGE_ALIGN(max_size);
1756 new_block = g_malloc0(sizeof(*new_block));
1757 new_block->mr = mr;
1758 new_block->resized = resized;
1759 new_block->used_length = size;
1760 new_block->max_length = max_size;
1761 assert(max_size >= size);
1762 new_block->fd = -1;
1763 new_block->page_size = getpagesize();
1764 new_block->host = host;
1765 if (host) {
1766 new_block->flags |= RAM_PREALLOC;
1768 if (resizeable) {
1769 new_block->flags |= RAM_RESIZEABLE;
1771 ram_block_add(new_block, &local_err);
1772 if (local_err) {
1773 g_free(new_block);
1774 error_propagate(errp, local_err);
1775 return NULL;
1777 return new_block;
1780 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1781 MemoryRegion *mr, Error **errp)
1783 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1786 RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1788 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1791 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1792 void (*resized)(const char*,
1793 uint64_t length,
1794 void *host),
1795 MemoryRegion *mr, Error **errp)
1797 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1800 static void reclaim_ramblock(RAMBlock *block)
1802 if (block->flags & RAM_PREALLOC) {
1804 } else if (xen_enabled()) {
1805 xen_invalidate_map_cache_entry(block->host);
1806 #ifndef _WIN32
1807 } else if (block->fd >= 0) {
1808 qemu_ram_munmap(block->host, block->max_length);
1809 close(block->fd);
1810 #endif
1811 } else {
1812 qemu_anon_ram_free(block->host, block->max_length);
1814 g_free(block);
1817 void qemu_ram_free(RAMBlock *block)
1819 if (!block) {
1820 return;
1823 qemu_mutex_lock_ramlist();
1824 QLIST_REMOVE_RCU(block, next);
1825 ram_list.mru_block = NULL;
1826 /* Write list before version */
1827 smp_wmb();
1828 ram_list.version++;
1829 call_rcu(block, reclaim_ramblock, rcu);
1830 qemu_mutex_unlock_ramlist();
1833 #ifndef _WIN32
1834 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1836 RAMBlock *block;
1837 ram_addr_t offset;
1838 int flags;
1839 void *area, *vaddr;
1841 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1842 offset = addr - block->offset;
1843 if (offset < block->max_length) {
1844 vaddr = ramblock_ptr(block, offset);
1845 if (block->flags & RAM_PREALLOC) {
1847 } else if (xen_enabled()) {
1848 abort();
1849 } else {
1850 flags = MAP_FIXED;
1851 if (block->fd >= 0) {
1852 flags |= (block->flags & RAM_SHARED ?
1853 MAP_SHARED : MAP_PRIVATE);
1854 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1855 flags, block->fd, offset);
1856 } else {
1858 * Remap needs to match alloc. Accelerators that
1859 * set phys_mem_alloc never remap. If they did,
1860 * we'd need a remap hook here.
1862 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1864 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1865 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1866 flags, -1, 0);
1868 if (area != vaddr) {
1869 fprintf(stderr, "Could not remap addr: "
1870 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1871 length, addr);
1872 exit(1);
1874 memory_try_enable_merging(vaddr, length);
1875 qemu_ram_setup_dump(vaddr, length);
1880 #endif /* !_WIN32 */
1882 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1883 * This should not be used for general purpose DMA. Use address_space_map
1884 * or address_space_rw instead. For local memory (e.g. video ram) that the
1885 * device owns, use memory_region_get_ram_ptr.
1887 * Called within RCU critical section.
1889 void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1891 RAMBlock *block = ram_block;
1893 if (block == NULL) {
1894 block = qemu_get_ram_block(addr);
1895 addr -= block->offset;
1898 if (xen_enabled() && block->host == NULL) {
1899 /* We need to check if the requested address is in the RAM
1900 * because we don't want to map the entire memory in QEMU.
1901 * In that case just map until the end of the page.
1903 if (block->offset == 0) {
1904 return xen_map_cache(addr, 0, 0);
1907 block->host = xen_map_cache(block->offset, block->max_length, 1);
1909 return ramblock_ptr(block, addr);
1912 /* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr
1913 * but takes a size argument.
1915 * Called within RCU critical section.
1917 static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1918 hwaddr *size)
1920 RAMBlock *block = ram_block;
1921 if (*size == 0) {
1922 return NULL;
1925 if (block == NULL) {
1926 block = qemu_get_ram_block(addr);
1927 addr -= block->offset;
1929 *size = MIN(*size, block->max_length - addr);
1931 if (xen_enabled() && block->host == NULL) {
1932 /* We need to check if the requested address is in the RAM
1933 * because we don't want to map the entire memory in QEMU.
1934 * In that case just map the requested area.
1936 if (block->offset == 0) {
1937 return xen_map_cache(addr, *size, 1);
1940 block->host = xen_map_cache(block->offset, block->max_length, 1);
1943 return ramblock_ptr(block, addr);
1947 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1948 * in that RAMBlock.
1950 * ptr: Host pointer to look up
1951 * round_offset: If true round the result offset down to a page boundary
1952 * *ram_addr: set to result ram_addr
1953 * *offset: set to result offset within the RAMBlock
1955 * Returns: RAMBlock (or NULL if not found)
1957 * By the time this function returns, the returned pointer is not protected
1958 * by RCU anymore. If the caller is not within an RCU critical section and
1959 * does not hold the iothread lock, it must have other means of protecting the
1960 * pointer, such as a reference to the region that includes the incoming
1961 * ram_addr_t.
1963 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1964 ram_addr_t *offset)
1966 RAMBlock *block;
1967 uint8_t *host = ptr;
1969 if (xen_enabled()) {
1970 ram_addr_t ram_addr;
1971 rcu_read_lock();
1972 ram_addr = xen_ram_addr_from_mapcache(ptr);
1973 block = qemu_get_ram_block(ram_addr);
1974 if (block) {
1975 *offset = ram_addr - block->offset;
1977 rcu_read_unlock();
1978 return block;
1981 rcu_read_lock();
1982 block = atomic_rcu_read(&ram_list.mru_block);
1983 if (block && block->host && host - block->host < block->max_length) {
1984 goto found;
1987 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1988 /* This case append when the block is not mapped. */
1989 if (block->host == NULL) {
1990 continue;
1992 if (host - block->host < block->max_length) {
1993 goto found;
1997 rcu_read_unlock();
1998 return NULL;
2000 found:
2001 *offset = (host - block->host);
2002 if (round_offset) {
2003 *offset &= TARGET_PAGE_MASK;
2005 rcu_read_unlock();
2006 return block;
2010 * Finds the named RAMBlock
2012 * name: The name of RAMBlock to find
2014 * Returns: RAMBlock (or NULL if not found)
2016 RAMBlock *qemu_ram_block_by_name(const char *name)
2018 RAMBlock *block;
2020 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2021 if (!strcmp(name, block->idstr)) {
2022 return block;
2026 return NULL;
2029 /* Some of the softmmu routines need to translate from a host pointer
2030 (typically a TLB entry) back to a ram offset. */
2031 ram_addr_t qemu_ram_addr_from_host(void *ptr)
2033 RAMBlock *block;
2034 ram_addr_t offset;
2036 block = qemu_ram_block_from_host(ptr, false, &offset);
2037 if (!block) {
2038 return RAM_ADDR_INVALID;
2041 return block->offset + offset;
2044 /* Called within RCU critical section. */
2045 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2046 uint64_t val, unsigned size)
2048 bool locked = false;
2050 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
2051 locked = true;
2052 tb_lock();
2053 tb_invalidate_phys_page_fast(ram_addr, size);
2055 switch (size) {
2056 case 1:
2057 stb_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2058 break;
2059 case 2:
2060 stw_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2061 break;
2062 case 4:
2063 stl_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2064 break;
2065 default:
2066 abort();
2069 if (locked) {
2070 tb_unlock();
2073 /* Set both VGA and migration bits for simplicity and to remove
2074 * the notdirty callback faster.
2076 cpu_physical_memory_set_dirty_range(ram_addr, size,
2077 DIRTY_CLIENTS_NOCODE);
2078 /* we remove the notdirty callback only if the code has been
2079 flushed */
2080 if (!cpu_physical_memory_is_clean(ram_addr)) {
2081 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2085 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2086 unsigned size, bool is_write)
2088 return is_write;
2091 static const MemoryRegionOps notdirty_mem_ops = {
2092 .write = notdirty_mem_write,
2093 .valid.accepts = notdirty_mem_accepts,
2094 .endianness = DEVICE_NATIVE_ENDIAN,
2097 /* Generate a debug exception if a watchpoint has been hit. */
2098 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2100 CPUState *cpu = current_cpu;
2101 CPUClass *cc = CPU_GET_CLASS(cpu);
2102 CPUArchState *env = cpu->env_ptr;
2103 target_ulong pc, cs_base;
2104 target_ulong vaddr;
2105 CPUWatchpoint *wp;
2106 uint32_t cpu_flags;
2108 if (cpu->watchpoint_hit) {
2109 /* We re-entered the check after replacing the TB. Now raise
2110 * the debug interrupt so that is will trigger after the
2111 * current instruction. */
2112 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2113 return;
2115 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2116 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2117 if (cpu_watchpoint_address_matches(wp, vaddr, len)
2118 && (wp->flags & flags)) {
2119 if (flags == BP_MEM_READ) {
2120 wp->flags |= BP_WATCHPOINT_HIT_READ;
2121 } else {
2122 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2124 wp->hitaddr = vaddr;
2125 wp->hitattrs = attrs;
2126 if (!cpu->watchpoint_hit) {
2127 if (wp->flags & BP_CPU &&
2128 !cc->debug_check_watchpoint(cpu, wp)) {
2129 wp->flags &= ~BP_WATCHPOINT_HIT;
2130 continue;
2132 cpu->watchpoint_hit = wp;
2134 /* The tb_lock will be reset when cpu_loop_exit or
2135 * cpu_loop_exit_noexc longjmp back into the cpu_exec
2136 * main loop.
2138 tb_lock();
2139 tb_check_watchpoint(cpu);
2140 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2141 cpu->exception_index = EXCP_DEBUG;
2142 cpu_loop_exit(cpu);
2143 } else {
2144 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2145 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2146 cpu_loop_exit_noexc(cpu);
2149 } else {
2150 wp->flags &= ~BP_WATCHPOINT_HIT;
2155 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2156 so these check for a hit then pass through to the normal out-of-line
2157 phys routines. */
2158 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2159 unsigned size, MemTxAttrs attrs)
2161 MemTxResult res;
2162 uint64_t data;
2163 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2164 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2166 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2167 switch (size) {
2168 case 1:
2169 data = address_space_ldub(as, addr, attrs, &res);
2170 break;
2171 case 2:
2172 data = address_space_lduw(as, addr, attrs, &res);
2173 break;
2174 case 4:
2175 data = address_space_ldl(as, addr, attrs, &res);
2176 break;
2177 default: abort();
2179 *pdata = data;
2180 return res;
2183 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2184 uint64_t val, unsigned size,
2185 MemTxAttrs attrs)
2187 MemTxResult res;
2188 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2189 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2191 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2192 switch (size) {
2193 case 1:
2194 address_space_stb(as, addr, val, attrs, &res);
2195 break;
2196 case 2:
2197 address_space_stw(as, addr, val, attrs, &res);
2198 break;
2199 case 4:
2200 address_space_stl(as, addr, val, attrs, &res);
2201 break;
2202 default: abort();
2204 return res;
2207 static const MemoryRegionOps watch_mem_ops = {
2208 .read_with_attrs = watch_mem_read,
2209 .write_with_attrs = watch_mem_write,
2210 .endianness = DEVICE_NATIVE_ENDIAN,
2213 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2214 unsigned len, MemTxAttrs attrs)
2216 subpage_t *subpage = opaque;
2217 uint8_t buf[8];
2218 MemTxResult res;
2220 #if defined(DEBUG_SUBPAGE)
2221 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2222 subpage, len, addr);
2223 #endif
2224 res = address_space_read(subpage->as, addr + subpage->base,
2225 attrs, buf, len);
2226 if (res) {
2227 return res;
2229 switch (len) {
2230 case 1:
2231 *data = ldub_p(buf);
2232 return MEMTX_OK;
2233 case 2:
2234 *data = lduw_p(buf);
2235 return MEMTX_OK;
2236 case 4:
2237 *data = ldl_p(buf);
2238 return MEMTX_OK;
2239 case 8:
2240 *data = ldq_p(buf);
2241 return MEMTX_OK;
2242 default:
2243 abort();
2247 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2248 uint64_t value, unsigned len, MemTxAttrs attrs)
2250 subpage_t *subpage = opaque;
2251 uint8_t buf[8];
2253 #if defined(DEBUG_SUBPAGE)
2254 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2255 " value %"PRIx64"\n",
2256 __func__, subpage, len, addr, value);
2257 #endif
2258 switch (len) {
2259 case 1:
2260 stb_p(buf, value);
2261 break;
2262 case 2:
2263 stw_p(buf, value);
2264 break;
2265 case 4:
2266 stl_p(buf, value);
2267 break;
2268 case 8:
2269 stq_p(buf, value);
2270 break;
2271 default:
2272 abort();
2274 return address_space_write(subpage->as, addr + subpage->base,
2275 attrs, buf, len);
2278 static bool subpage_accepts(void *opaque, hwaddr addr,
2279 unsigned len, bool is_write)
2281 subpage_t *subpage = opaque;
2282 #if defined(DEBUG_SUBPAGE)
2283 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2284 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2285 #endif
2287 return address_space_access_valid(subpage->as, addr + subpage->base,
2288 len, is_write);
2291 static const MemoryRegionOps subpage_ops = {
2292 .read_with_attrs = subpage_read,
2293 .write_with_attrs = subpage_write,
2294 .impl.min_access_size = 1,
2295 .impl.max_access_size = 8,
2296 .valid.min_access_size = 1,
2297 .valid.max_access_size = 8,
2298 .valid.accepts = subpage_accepts,
2299 .endianness = DEVICE_NATIVE_ENDIAN,
2302 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2303 uint16_t section)
2305 int idx, eidx;
2307 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2308 return -1;
2309 idx = SUBPAGE_IDX(start);
2310 eidx = SUBPAGE_IDX(end);
2311 #if defined(DEBUG_SUBPAGE)
2312 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2313 __func__, mmio, start, end, idx, eidx, section);
2314 #endif
2315 for (; idx <= eidx; idx++) {
2316 mmio->sub_section[idx] = section;
2319 return 0;
2322 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2324 subpage_t *mmio;
2326 mmio = g_malloc0(sizeof(subpage_t) + TARGET_PAGE_SIZE * sizeof(uint16_t));
2327 mmio->as = as;
2328 mmio->base = base;
2329 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2330 NULL, TARGET_PAGE_SIZE);
2331 mmio->iomem.subpage = true;
2332 #if defined(DEBUG_SUBPAGE)
2333 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2334 mmio, base, TARGET_PAGE_SIZE);
2335 #endif
2336 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2338 return mmio;
2341 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2342 MemoryRegion *mr)
2344 assert(as);
2345 MemoryRegionSection section = {
2346 .address_space = as,
2347 .mr = mr,
2348 .offset_within_address_space = 0,
2349 .offset_within_region = 0,
2350 .size = int128_2_64(),
2353 return phys_section_add(map, &section);
2356 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2358 int asidx = cpu_asidx_from_attrs(cpu, attrs);
2359 CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2360 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2361 MemoryRegionSection *sections = d->map.sections;
2363 return sections[index & ~TARGET_PAGE_MASK].mr;
2366 static void io_mem_init(void)
2368 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2369 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2370 NULL, UINT64_MAX);
2371 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2372 NULL, UINT64_MAX);
2373 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2374 NULL, UINT64_MAX);
2377 static void mem_begin(MemoryListener *listener)
2379 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2380 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2381 uint16_t n;
2383 n = dummy_section(&d->map, as, &io_mem_unassigned);
2384 assert(n == PHYS_SECTION_UNASSIGNED);
2385 n = dummy_section(&d->map, as, &io_mem_notdirty);
2386 assert(n == PHYS_SECTION_NOTDIRTY);
2387 n = dummy_section(&d->map, as, &io_mem_rom);
2388 assert(n == PHYS_SECTION_ROM);
2389 n = dummy_section(&d->map, as, &io_mem_watch);
2390 assert(n == PHYS_SECTION_WATCH);
2392 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2393 d->as = as;
2394 as->next_dispatch = d;
2397 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2399 phys_sections_free(&d->map);
2400 g_free(d);
2403 static void mem_commit(MemoryListener *listener)
2405 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2406 AddressSpaceDispatch *cur = as->dispatch;
2407 AddressSpaceDispatch *next = as->next_dispatch;
2409 phys_page_compact_all(next, next->map.nodes_nb);
2411 atomic_rcu_set(&as->dispatch, next);
2412 if (cur) {
2413 call_rcu(cur, address_space_dispatch_free, rcu);
2417 static void tcg_commit(MemoryListener *listener)
2419 CPUAddressSpace *cpuas;
2420 AddressSpaceDispatch *d;
2422 /* since each CPU stores ram addresses in its TLB cache, we must
2423 reset the modified entries */
2424 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2425 cpu_reloading_memory_map();
2426 /* The CPU and TLB are protected by the iothread lock.
2427 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2428 * may have split the RCU critical section.
2430 d = atomic_rcu_read(&cpuas->as->dispatch);
2431 atomic_rcu_set(&cpuas->memory_dispatch, d);
2432 tlb_flush(cpuas->cpu, 1);
2435 void address_space_init_dispatch(AddressSpace *as)
2437 as->dispatch = NULL;
2438 as->dispatch_listener = (MemoryListener) {
2439 .begin = mem_begin,
2440 .commit = mem_commit,
2441 .region_add = mem_add,
2442 .region_nop = mem_add,
2443 .priority = 0,
2445 memory_listener_register(&as->dispatch_listener, as);
2448 void address_space_unregister(AddressSpace *as)
2450 memory_listener_unregister(&as->dispatch_listener);
2453 void address_space_destroy_dispatch(AddressSpace *as)
2455 AddressSpaceDispatch *d = as->dispatch;
2457 atomic_rcu_set(&as->dispatch, NULL);
2458 if (d) {
2459 call_rcu(d, address_space_dispatch_free, rcu);
2463 static void memory_map_init(void)
2465 system_memory = g_malloc(sizeof(*system_memory));
2467 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2468 address_space_init(&address_space_memory, system_memory, "memory");
2470 system_io = g_malloc(sizeof(*system_io));
2471 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2472 65536);
2473 address_space_init(&address_space_io, system_io, "I/O");
2476 MemoryRegion *get_system_memory(void)
2478 return system_memory;
2481 MemoryRegion *get_system_io(void)
2483 return system_io;
2486 #endif /* !defined(CONFIG_USER_ONLY) */
2488 /* physical memory access (slow version, mainly for debug) */
2489 #if defined(CONFIG_USER_ONLY)
2490 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2491 uint8_t *buf, int len, int is_write)
2493 int l, flags;
2494 target_ulong page;
2495 void * p;
2497 while (len > 0) {
2498 page = addr & TARGET_PAGE_MASK;
2499 l = (page + TARGET_PAGE_SIZE) - addr;
2500 if (l > len)
2501 l = len;
2502 flags = page_get_flags(page);
2503 if (!(flags & PAGE_VALID))
2504 return -1;
2505 if (is_write) {
2506 if (!(flags & PAGE_WRITE))
2507 return -1;
2508 /* XXX: this code should not depend on lock_user */
2509 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2510 return -1;
2511 memcpy(p, buf, l);
2512 unlock_user(p, addr, l);
2513 } else {
2514 if (!(flags & PAGE_READ))
2515 return -1;
2516 /* XXX: this code should not depend on lock_user */
2517 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2518 return -1;
2519 memcpy(buf, p, l);
2520 unlock_user(p, addr, 0);
2522 len -= l;
2523 buf += l;
2524 addr += l;
2526 return 0;
2529 #else
2531 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2532 hwaddr length)
2534 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2535 addr += memory_region_get_ram_addr(mr);
2537 /* No early return if dirty_log_mask is or becomes 0, because
2538 * cpu_physical_memory_set_dirty_range will still call
2539 * xen_modified_memory.
2541 if (dirty_log_mask) {
2542 dirty_log_mask =
2543 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2545 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2546 tb_lock();
2547 tb_invalidate_phys_range(addr, addr + length);
2548 tb_unlock();
2549 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2551 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2554 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2556 unsigned access_size_max = mr->ops->valid.max_access_size;
2558 /* Regions are assumed to support 1-4 byte accesses unless
2559 otherwise specified. */
2560 if (access_size_max == 0) {
2561 access_size_max = 4;
2564 /* Bound the maximum access by the alignment of the address. */
2565 if (!mr->ops->impl.unaligned) {
2566 unsigned align_size_max = addr & -addr;
2567 if (align_size_max != 0 && align_size_max < access_size_max) {
2568 access_size_max = align_size_max;
2572 /* Don't attempt accesses larger than the maximum. */
2573 if (l > access_size_max) {
2574 l = access_size_max;
2576 l = pow2floor(l);
2578 return l;
2581 static bool prepare_mmio_access(MemoryRegion *mr)
2583 bool unlocked = !qemu_mutex_iothread_locked();
2584 bool release_lock = false;
2586 if (unlocked && mr->global_locking) {
2587 qemu_mutex_lock_iothread();
2588 unlocked = false;
2589 release_lock = true;
2591 if (mr->flush_coalesced_mmio) {
2592 if (unlocked) {
2593 qemu_mutex_lock_iothread();
2595 qemu_flush_coalesced_mmio_buffer();
2596 if (unlocked) {
2597 qemu_mutex_unlock_iothread();
2601 return release_lock;
2604 /* Called within RCU critical section. */
2605 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2606 MemTxAttrs attrs,
2607 const uint8_t *buf,
2608 int len, hwaddr addr1,
2609 hwaddr l, MemoryRegion *mr)
2611 uint8_t *ptr;
2612 uint64_t val;
2613 MemTxResult result = MEMTX_OK;
2614 bool release_lock = false;
2616 for (;;) {
2617 if (!memory_access_is_direct(mr, true)) {
2618 release_lock |= prepare_mmio_access(mr);
2619 l = memory_access_size(mr, l, addr1);
2620 /* XXX: could force current_cpu to NULL to avoid
2621 potential bugs */
2622 switch (l) {
2623 case 8:
2624 /* 64 bit write access */
2625 val = ldq_p(buf);
2626 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2627 attrs);
2628 break;
2629 case 4:
2630 /* 32 bit write access */
2631 val = ldl_p(buf);
2632 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2633 attrs);
2634 break;
2635 case 2:
2636 /* 16 bit write access */
2637 val = lduw_p(buf);
2638 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2639 attrs);
2640 break;
2641 case 1:
2642 /* 8 bit write access */
2643 val = ldub_p(buf);
2644 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2645 attrs);
2646 break;
2647 default:
2648 abort();
2650 } else {
2651 /* RAM case */
2652 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2653 memcpy(ptr, buf, l);
2654 invalidate_and_set_dirty(mr, addr1, l);
2657 if (release_lock) {
2658 qemu_mutex_unlock_iothread();
2659 release_lock = false;
2662 len -= l;
2663 buf += l;
2664 addr += l;
2666 if (!len) {
2667 break;
2670 l = len;
2671 mr = address_space_translate(as, addr, &addr1, &l, true);
2674 return result;
2677 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2678 const uint8_t *buf, int len)
2680 hwaddr l;
2681 hwaddr addr1;
2682 MemoryRegion *mr;
2683 MemTxResult result = MEMTX_OK;
2685 if (len > 0) {
2686 rcu_read_lock();
2687 l = len;
2688 mr = address_space_translate(as, addr, &addr1, &l, true);
2689 result = address_space_write_continue(as, addr, attrs, buf, len,
2690 addr1, l, mr);
2691 rcu_read_unlock();
2694 return result;
2697 /* Called within RCU critical section. */
2698 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2699 MemTxAttrs attrs, uint8_t *buf,
2700 int len, hwaddr addr1, hwaddr l,
2701 MemoryRegion *mr)
2703 uint8_t *ptr;
2704 uint64_t val;
2705 MemTxResult result = MEMTX_OK;
2706 bool release_lock = false;
2708 for (;;) {
2709 if (!memory_access_is_direct(mr, false)) {
2710 /* I/O case */
2711 release_lock |= prepare_mmio_access(mr);
2712 l = memory_access_size(mr, l, addr1);
2713 switch (l) {
2714 case 8:
2715 /* 64 bit read access */
2716 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2717 attrs);
2718 stq_p(buf, val);
2719 break;
2720 case 4:
2721 /* 32 bit read access */
2722 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2723 attrs);
2724 stl_p(buf, val);
2725 break;
2726 case 2:
2727 /* 16 bit read access */
2728 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2729 attrs);
2730 stw_p(buf, val);
2731 break;
2732 case 1:
2733 /* 8 bit read access */
2734 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2735 attrs);
2736 stb_p(buf, val);
2737 break;
2738 default:
2739 abort();
2741 } else {
2742 /* RAM case */
2743 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2744 memcpy(buf, ptr, l);
2747 if (release_lock) {
2748 qemu_mutex_unlock_iothread();
2749 release_lock = false;
2752 len -= l;
2753 buf += l;
2754 addr += l;
2756 if (!len) {
2757 break;
2760 l = len;
2761 mr = address_space_translate(as, addr, &addr1, &l, false);
2764 return result;
2767 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2768 MemTxAttrs attrs, uint8_t *buf, int len)
2770 hwaddr l;
2771 hwaddr addr1;
2772 MemoryRegion *mr;
2773 MemTxResult result = MEMTX_OK;
2775 if (len > 0) {
2776 rcu_read_lock();
2777 l = len;
2778 mr = address_space_translate(as, addr, &addr1, &l, false);
2779 result = address_space_read_continue(as, addr, attrs, buf, len,
2780 addr1, l, mr);
2781 rcu_read_unlock();
2784 return result;
2787 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2788 uint8_t *buf, int len, bool is_write)
2790 if (is_write) {
2791 return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2792 } else {
2793 return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2797 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2798 int len, int is_write)
2800 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2801 buf, len, is_write);
2804 enum write_rom_type {
2805 WRITE_DATA,
2806 FLUSH_CACHE,
2809 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2810 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2812 hwaddr l;
2813 uint8_t *ptr;
2814 hwaddr addr1;
2815 MemoryRegion *mr;
2817 rcu_read_lock();
2818 while (len > 0) {
2819 l = len;
2820 mr = address_space_translate(as, addr, &addr1, &l, true);
2822 if (!(memory_region_is_ram(mr) ||
2823 memory_region_is_romd(mr))) {
2824 l = memory_access_size(mr, l, addr1);
2825 } else {
2826 /* ROM/RAM case */
2827 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2828 switch (type) {
2829 case WRITE_DATA:
2830 memcpy(ptr, buf, l);
2831 invalidate_and_set_dirty(mr, addr1, l);
2832 break;
2833 case FLUSH_CACHE:
2834 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2835 break;
2838 len -= l;
2839 buf += l;
2840 addr += l;
2842 rcu_read_unlock();
2845 /* used for ROM loading : can write in RAM and ROM */
2846 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2847 const uint8_t *buf, int len)
2849 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2852 void cpu_flush_icache_range(hwaddr start, int len)
2855 * This function should do the same thing as an icache flush that was
2856 * triggered from within the guest. For TCG we are always cache coherent,
2857 * so there is no need to flush anything. For KVM / Xen we need to flush
2858 * the host's instruction cache at least.
2860 if (tcg_enabled()) {
2861 return;
2864 cpu_physical_memory_write_rom_internal(&address_space_memory,
2865 start, NULL, len, FLUSH_CACHE);
2868 typedef struct {
2869 MemoryRegion *mr;
2870 void *buffer;
2871 hwaddr addr;
2872 hwaddr len;
2873 bool in_use;
2874 } BounceBuffer;
2876 static BounceBuffer bounce;
2878 typedef struct MapClient {
2879 QEMUBH *bh;
2880 QLIST_ENTRY(MapClient) link;
2881 } MapClient;
2883 QemuMutex map_client_list_lock;
2884 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2885 = QLIST_HEAD_INITIALIZER(map_client_list);
2887 static void cpu_unregister_map_client_do(MapClient *client)
2889 QLIST_REMOVE(client, link);
2890 g_free(client);
2893 static void cpu_notify_map_clients_locked(void)
2895 MapClient *client;
2897 while (!QLIST_EMPTY(&map_client_list)) {
2898 client = QLIST_FIRST(&map_client_list);
2899 qemu_bh_schedule(client->bh);
2900 cpu_unregister_map_client_do(client);
2904 void cpu_register_map_client(QEMUBH *bh)
2906 MapClient *client = g_malloc(sizeof(*client));
2908 qemu_mutex_lock(&map_client_list_lock);
2909 client->bh = bh;
2910 QLIST_INSERT_HEAD(&map_client_list, client, link);
2911 if (!atomic_read(&bounce.in_use)) {
2912 cpu_notify_map_clients_locked();
2914 qemu_mutex_unlock(&map_client_list_lock);
2917 void cpu_exec_init_all(void)
2919 qemu_mutex_init(&ram_list.mutex);
2920 /* The data structures we set up here depend on knowing the page size,
2921 * so no more changes can be made after this point.
2922 * In an ideal world, nothing we did before we had finished the
2923 * machine setup would care about the target page size, and we could
2924 * do this much later, rather than requiring board models to state
2925 * up front what their requirements are.
2927 finalize_target_page_bits();
2928 io_mem_init();
2929 memory_map_init();
2930 qemu_mutex_init(&map_client_list_lock);
2933 void cpu_unregister_map_client(QEMUBH *bh)
2935 MapClient *client;
2937 qemu_mutex_lock(&map_client_list_lock);
2938 QLIST_FOREACH(client, &map_client_list, link) {
2939 if (client->bh == bh) {
2940 cpu_unregister_map_client_do(client);
2941 break;
2944 qemu_mutex_unlock(&map_client_list_lock);
2947 static void cpu_notify_map_clients(void)
2949 qemu_mutex_lock(&map_client_list_lock);
2950 cpu_notify_map_clients_locked();
2951 qemu_mutex_unlock(&map_client_list_lock);
2954 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2956 MemoryRegion *mr;
2957 hwaddr l, xlat;
2959 rcu_read_lock();
2960 while (len > 0) {
2961 l = len;
2962 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2963 if (!memory_access_is_direct(mr, is_write)) {
2964 l = memory_access_size(mr, l, addr);
2965 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2966 return false;
2970 len -= l;
2971 addr += l;
2973 rcu_read_unlock();
2974 return true;
2977 /* Map a physical memory region into a host virtual address.
2978 * May map a subset of the requested range, given by and returned in *plen.
2979 * May return NULL if resources needed to perform the mapping are exhausted.
2980 * Use only for reads OR writes - not for read-modify-write operations.
2981 * Use cpu_register_map_client() to know when retrying the map operation is
2982 * likely to succeed.
2984 void *address_space_map(AddressSpace *as,
2985 hwaddr addr,
2986 hwaddr *plen,
2987 bool is_write)
2989 hwaddr len = *plen;
2990 hwaddr done = 0;
2991 hwaddr l, xlat, base;
2992 MemoryRegion *mr, *this_mr;
2993 void *ptr;
2995 if (len == 0) {
2996 return NULL;
2999 l = len;
3000 rcu_read_lock();
3001 mr = address_space_translate(as, addr, &xlat, &l, is_write);
3003 if (!memory_access_is_direct(mr, is_write)) {
3004 if (atomic_xchg(&bounce.in_use, true)) {
3005 rcu_read_unlock();
3006 return NULL;
3008 /* Avoid unbounded allocations */
3009 l = MIN(l, TARGET_PAGE_SIZE);
3010 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
3011 bounce.addr = addr;
3012 bounce.len = l;
3014 memory_region_ref(mr);
3015 bounce.mr = mr;
3016 if (!is_write) {
3017 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
3018 bounce.buffer, l);
3021 rcu_read_unlock();
3022 *plen = l;
3023 return bounce.buffer;
3026 base = xlat;
3028 for (;;) {
3029 len -= l;
3030 addr += l;
3031 done += l;
3032 if (len == 0) {
3033 break;
3036 l = len;
3037 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
3038 if (this_mr != mr || xlat != base + done) {
3039 break;
3043 memory_region_ref(mr);
3044 *plen = done;
3045 ptr = qemu_ram_ptr_length(mr->ram_block, base, plen);
3046 rcu_read_unlock();
3048 return ptr;
3051 /* Unmaps a memory region previously mapped by address_space_map().
3052 * Will also mark the memory as dirty if is_write == 1. access_len gives
3053 * the amount of memory that was actually read or written by the caller.
3055 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3056 int is_write, hwaddr access_len)
3058 if (buffer != bounce.buffer) {
3059 MemoryRegion *mr;
3060 ram_addr_t addr1;
3062 mr = memory_region_from_host(buffer, &addr1);
3063 assert(mr != NULL);
3064 if (is_write) {
3065 invalidate_and_set_dirty(mr, addr1, access_len);
3067 if (xen_enabled()) {
3068 xen_invalidate_map_cache_entry(buffer);
3070 memory_region_unref(mr);
3071 return;
3073 if (is_write) {
3074 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
3075 bounce.buffer, access_len);
3077 qemu_vfree(bounce.buffer);
3078 bounce.buffer = NULL;
3079 memory_region_unref(bounce.mr);
3080 atomic_mb_set(&bounce.in_use, false);
3081 cpu_notify_map_clients();
3084 void *cpu_physical_memory_map(hwaddr addr,
3085 hwaddr *plen,
3086 int is_write)
3088 return address_space_map(&address_space_memory, addr, plen, is_write);
3091 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3092 int is_write, hwaddr access_len)
3094 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3097 /* warning: addr must be aligned */
3098 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3099 MemTxAttrs attrs,
3100 MemTxResult *result,
3101 enum device_endian endian)
3103 uint8_t *ptr;
3104 uint64_t val;
3105 MemoryRegion *mr;
3106 hwaddr l = 4;
3107 hwaddr addr1;
3108 MemTxResult r;
3109 bool release_lock = false;
3111 rcu_read_lock();
3112 mr = address_space_translate(as, addr, &addr1, &l, false);
3113 if (l < 4 || !memory_access_is_direct(mr, false)) {
3114 release_lock |= prepare_mmio_access(mr);
3116 /* I/O case */
3117 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3118 #if defined(TARGET_WORDS_BIGENDIAN)
3119 if (endian == DEVICE_LITTLE_ENDIAN) {
3120 val = bswap32(val);
3122 #else
3123 if (endian == DEVICE_BIG_ENDIAN) {
3124 val = bswap32(val);
3126 #endif
3127 } else {
3128 /* RAM case */
3129 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3130 switch (endian) {
3131 case DEVICE_LITTLE_ENDIAN:
3132 val = ldl_le_p(ptr);
3133 break;
3134 case DEVICE_BIG_ENDIAN:
3135 val = ldl_be_p(ptr);
3136 break;
3137 default:
3138 val = ldl_p(ptr);
3139 break;
3141 r = MEMTX_OK;
3143 if (result) {
3144 *result = r;
3146 if (release_lock) {
3147 qemu_mutex_unlock_iothread();
3149 rcu_read_unlock();
3150 return val;
3153 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3154 MemTxAttrs attrs, MemTxResult *result)
3156 return address_space_ldl_internal(as, addr, attrs, result,
3157 DEVICE_NATIVE_ENDIAN);
3160 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3161 MemTxAttrs attrs, MemTxResult *result)
3163 return address_space_ldl_internal(as, addr, attrs, result,
3164 DEVICE_LITTLE_ENDIAN);
3167 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3168 MemTxAttrs attrs, MemTxResult *result)
3170 return address_space_ldl_internal(as, addr, attrs, result,
3171 DEVICE_BIG_ENDIAN);
3174 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3176 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3179 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3181 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3184 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3186 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3189 /* warning: addr must be aligned */
3190 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3191 MemTxAttrs attrs,
3192 MemTxResult *result,
3193 enum device_endian endian)
3195 uint8_t *ptr;
3196 uint64_t val;
3197 MemoryRegion *mr;
3198 hwaddr l = 8;
3199 hwaddr addr1;
3200 MemTxResult r;
3201 bool release_lock = false;
3203 rcu_read_lock();
3204 mr = address_space_translate(as, addr, &addr1, &l,
3205 false);
3206 if (l < 8 || !memory_access_is_direct(mr, false)) {
3207 release_lock |= prepare_mmio_access(mr);
3209 /* I/O case */
3210 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3211 #if defined(TARGET_WORDS_BIGENDIAN)
3212 if (endian == DEVICE_LITTLE_ENDIAN) {
3213 val = bswap64(val);
3215 #else
3216 if (endian == DEVICE_BIG_ENDIAN) {
3217 val = bswap64(val);
3219 #endif
3220 } else {
3221 /* RAM case */
3222 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3223 switch (endian) {
3224 case DEVICE_LITTLE_ENDIAN:
3225 val = ldq_le_p(ptr);
3226 break;
3227 case DEVICE_BIG_ENDIAN:
3228 val = ldq_be_p(ptr);
3229 break;
3230 default:
3231 val = ldq_p(ptr);
3232 break;
3234 r = MEMTX_OK;
3236 if (result) {
3237 *result = r;
3239 if (release_lock) {
3240 qemu_mutex_unlock_iothread();
3242 rcu_read_unlock();
3243 return val;
3246 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3247 MemTxAttrs attrs, MemTxResult *result)
3249 return address_space_ldq_internal(as, addr, attrs, result,
3250 DEVICE_NATIVE_ENDIAN);
3253 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3254 MemTxAttrs attrs, MemTxResult *result)
3256 return address_space_ldq_internal(as, addr, attrs, result,
3257 DEVICE_LITTLE_ENDIAN);
3260 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3261 MemTxAttrs attrs, MemTxResult *result)
3263 return address_space_ldq_internal(as, addr, attrs, result,
3264 DEVICE_BIG_ENDIAN);
3267 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3269 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3272 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3274 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3277 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3279 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3282 /* XXX: optimize */
3283 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3284 MemTxAttrs attrs, MemTxResult *result)
3286 uint8_t val;
3287 MemTxResult r;
3289 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3290 if (result) {
3291 *result = r;
3293 return val;
3296 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3298 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3301 /* warning: addr must be aligned */
3302 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3303 hwaddr addr,
3304 MemTxAttrs attrs,
3305 MemTxResult *result,
3306 enum device_endian endian)
3308 uint8_t *ptr;
3309 uint64_t val;
3310 MemoryRegion *mr;
3311 hwaddr l = 2;
3312 hwaddr addr1;
3313 MemTxResult r;
3314 bool release_lock = false;
3316 rcu_read_lock();
3317 mr = address_space_translate(as, addr, &addr1, &l,
3318 false);
3319 if (l < 2 || !memory_access_is_direct(mr, false)) {
3320 release_lock |= prepare_mmio_access(mr);
3322 /* I/O case */
3323 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3324 #if defined(TARGET_WORDS_BIGENDIAN)
3325 if (endian == DEVICE_LITTLE_ENDIAN) {
3326 val = bswap16(val);
3328 #else
3329 if (endian == DEVICE_BIG_ENDIAN) {
3330 val = bswap16(val);
3332 #endif
3333 } else {
3334 /* RAM case */
3335 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3336 switch (endian) {
3337 case DEVICE_LITTLE_ENDIAN:
3338 val = lduw_le_p(ptr);
3339 break;
3340 case DEVICE_BIG_ENDIAN:
3341 val = lduw_be_p(ptr);
3342 break;
3343 default:
3344 val = lduw_p(ptr);
3345 break;
3347 r = MEMTX_OK;
3349 if (result) {
3350 *result = r;
3352 if (release_lock) {
3353 qemu_mutex_unlock_iothread();
3355 rcu_read_unlock();
3356 return val;
3359 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3360 MemTxAttrs attrs, MemTxResult *result)
3362 return address_space_lduw_internal(as, addr, attrs, result,
3363 DEVICE_NATIVE_ENDIAN);
3366 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3367 MemTxAttrs attrs, MemTxResult *result)
3369 return address_space_lduw_internal(as, addr, attrs, result,
3370 DEVICE_LITTLE_ENDIAN);
3373 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3374 MemTxAttrs attrs, MemTxResult *result)
3376 return address_space_lduw_internal(as, addr, attrs, result,
3377 DEVICE_BIG_ENDIAN);
3380 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3382 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3385 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3387 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3390 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3392 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3395 /* warning: addr must be aligned. The ram page is not masked as dirty
3396 and the code inside is not invalidated. It is useful if the dirty
3397 bits are used to track modified PTEs */
3398 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3399 MemTxAttrs attrs, MemTxResult *result)
3401 uint8_t *ptr;
3402 MemoryRegion *mr;
3403 hwaddr l = 4;
3404 hwaddr addr1;
3405 MemTxResult r;
3406 uint8_t dirty_log_mask;
3407 bool release_lock = false;
3409 rcu_read_lock();
3410 mr = address_space_translate(as, addr, &addr1, &l,
3411 true);
3412 if (l < 4 || !memory_access_is_direct(mr, true)) {
3413 release_lock |= prepare_mmio_access(mr);
3415 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3416 } else {
3417 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3418 stl_p(ptr, val);
3420 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3421 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3422 cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr,
3423 4, dirty_log_mask);
3424 r = MEMTX_OK;
3426 if (result) {
3427 *result = r;
3429 if (release_lock) {
3430 qemu_mutex_unlock_iothread();
3432 rcu_read_unlock();
3435 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3437 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3440 /* warning: addr must be aligned */
3441 static inline void address_space_stl_internal(AddressSpace *as,
3442 hwaddr addr, uint32_t val,
3443 MemTxAttrs attrs,
3444 MemTxResult *result,
3445 enum device_endian endian)
3447 uint8_t *ptr;
3448 MemoryRegion *mr;
3449 hwaddr l = 4;
3450 hwaddr addr1;
3451 MemTxResult r;
3452 bool release_lock = false;
3454 rcu_read_lock();
3455 mr = address_space_translate(as, addr, &addr1, &l,
3456 true);
3457 if (l < 4 || !memory_access_is_direct(mr, true)) {
3458 release_lock |= prepare_mmio_access(mr);
3460 #if defined(TARGET_WORDS_BIGENDIAN)
3461 if (endian == DEVICE_LITTLE_ENDIAN) {
3462 val = bswap32(val);
3464 #else
3465 if (endian == DEVICE_BIG_ENDIAN) {
3466 val = bswap32(val);
3468 #endif
3469 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3470 } else {
3471 /* RAM case */
3472 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3473 switch (endian) {
3474 case DEVICE_LITTLE_ENDIAN:
3475 stl_le_p(ptr, val);
3476 break;
3477 case DEVICE_BIG_ENDIAN:
3478 stl_be_p(ptr, val);
3479 break;
3480 default:
3481 stl_p(ptr, val);
3482 break;
3484 invalidate_and_set_dirty(mr, addr1, 4);
3485 r = MEMTX_OK;
3487 if (result) {
3488 *result = r;
3490 if (release_lock) {
3491 qemu_mutex_unlock_iothread();
3493 rcu_read_unlock();
3496 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3497 MemTxAttrs attrs, MemTxResult *result)
3499 address_space_stl_internal(as, addr, val, attrs, result,
3500 DEVICE_NATIVE_ENDIAN);
3503 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3504 MemTxAttrs attrs, MemTxResult *result)
3506 address_space_stl_internal(as, addr, val, attrs, result,
3507 DEVICE_LITTLE_ENDIAN);
3510 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3511 MemTxAttrs attrs, MemTxResult *result)
3513 address_space_stl_internal(as, addr, val, attrs, result,
3514 DEVICE_BIG_ENDIAN);
3517 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3519 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3522 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3524 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3527 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3529 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3532 /* XXX: optimize */
3533 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3534 MemTxAttrs attrs, MemTxResult *result)
3536 uint8_t v = val;
3537 MemTxResult r;
3539 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3540 if (result) {
3541 *result = r;
3545 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3547 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3550 /* warning: addr must be aligned */
3551 static inline void address_space_stw_internal(AddressSpace *as,
3552 hwaddr addr, uint32_t val,
3553 MemTxAttrs attrs,
3554 MemTxResult *result,
3555 enum device_endian endian)
3557 uint8_t *ptr;
3558 MemoryRegion *mr;
3559 hwaddr l = 2;
3560 hwaddr addr1;
3561 MemTxResult r;
3562 bool release_lock = false;
3564 rcu_read_lock();
3565 mr = address_space_translate(as, addr, &addr1, &l, true);
3566 if (l < 2 || !memory_access_is_direct(mr, true)) {
3567 release_lock |= prepare_mmio_access(mr);
3569 #if defined(TARGET_WORDS_BIGENDIAN)
3570 if (endian == DEVICE_LITTLE_ENDIAN) {
3571 val = bswap16(val);
3573 #else
3574 if (endian == DEVICE_BIG_ENDIAN) {
3575 val = bswap16(val);
3577 #endif
3578 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3579 } else {
3580 /* RAM case */
3581 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3582 switch (endian) {
3583 case DEVICE_LITTLE_ENDIAN:
3584 stw_le_p(ptr, val);
3585 break;
3586 case DEVICE_BIG_ENDIAN:
3587 stw_be_p(ptr, val);
3588 break;
3589 default:
3590 stw_p(ptr, val);
3591 break;
3593 invalidate_and_set_dirty(mr, addr1, 2);
3594 r = MEMTX_OK;
3596 if (result) {
3597 *result = r;
3599 if (release_lock) {
3600 qemu_mutex_unlock_iothread();
3602 rcu_read_unlock();
3605 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3606 MemTxAttrs attrs, MemTxResult *result)
3608 address_space_stw_internal(as, addr, val, attrs, result,
3609 DEVICE_NATIVE_ENDIAN);
3612 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3613 MemTxAttrs attrs, MemTxResult *result)
3615 address_space_stw_internal(as, addr, val, attrs, result,
3616 DEVICE_LITTLE_ENDIAN);
3619 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3620 MemTxAttrs attrs, MemTxResult *result)
3622 address_space_stw_internal(as, addr, val, attrs, result,
3623 DEVICE_BIG_ENDIAN);
3626 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3628 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3631 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3633 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3636 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3638 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3641 /* XXX: optimize */
3642 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3643 MemTxAttrs attrs, MemTxResult *result)
3645 MemTxResult r;
3646 val = tswap64(val);
3647 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3648 if (result) {
3649 *result = r;
3653 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3654 MemTxAttrs attrs, MemTxResult *result)
3656 MemTxResult r;
3657 val = cpu_to_le64(val);
3658 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3659 if (result) {
3660 *result = r;
3663 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3664 MemTxAttrs attrs, MemTxResult *result)
3666 MemTxResult r;
3667 val = cpu_to_be64(val);
3668 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3669 if (result) {
3670 *result = r;
3674 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3676 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3679 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3681 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3684 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3686 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3689 /* virtual memory access for debug (includes writing to ROM) */
3690 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3691 uint8_t *buf, int len, int is_write)
3693 int l;
3694 hwaddr phys_addr;
3695 target_ulong page;
3697 while (len > 0) {
3698 int asidx;
3699 MemTxAttrs attrs;
3701 page = addr & TARGET_PAGE_MASK;
3702 phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3703 asidx = cpu_asidx_from_attrs(cpu, attrs);
3704 /* if no physical page mapped, return an error */
3705 if (phys_addr == -1)
3706 return -1;
3707 l = (page + TARGET_PAGE_SIZE) - addr;
3708 if (l > len)
3709 l = len;
3710 phys_addr += (addr & ~TARGET_PAGE_MASK);
3711 if (is_write) {
3712 cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3713 phys_addr, buf, l);
3714 } else {
3715 address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3716 MEMTXATTRS_UNSPECIFIED,
3717 buf, l, 0);
3719 len -= l;
3720 buf += l;
3721 addr += l;
3723 return 0;
3727 * Allows code that needs to deal with migration bitmaps etc to still be built
3728 * target independent.
3730 size_t qemu_target_page_bits(void)
3732 return TARGET_PAGE_BITS;
3735 #endif
3738 * A helper function for the _utterly broken_ virtio device model to find out if
3739 * it's running on a big endian machine. Don't do this at home kids!
3741 bool target_words_bigendian(void);
3742 bool target_words_bigendian(void)
3744 #if defined(TARGET_WORDS_BIGENDIAN)
3745 return true;
3746 #else
3747 return false;
3748 #endif
3751 #ifndef CONFIG_USER_ONLY
3752 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3754 MemoryRegion*mr;
3755 hwaddr l = 1;
3756 bool res;
3758 rcu_read_lock();
3759 mr = address_space_translate(&address_space_memory,
3760 phys_addr, &phys_addr, &l, false);
3762 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3763 rcu_read_unlock();
3764 return res;
3767 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3769 RAMBlock *block;
3770 int ret = 0;
3772 rcu_read_lock();
3773 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3774 ret = func(block->idstr, block->host, block->offset,
3775 block->used_length, opaque);
3776 if (ret) {
3777 break;
3780 rcu_read_unlock();
3781 return ret;
3783 #endif