Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
[qemu/ar7.git] / exec.c
blobb1094c0cd2d68d885dda3bd8587800c5ec182219
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include "qapi/error.h"
21 #ifndef _WIN32
22 #endif
24 #include "qemu/cutils.h"
25 #include "cpu.h"
26 #include "exec/exec-all.h"
27 #include "tcg.h"
28 #include "hw/qdev-core.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
31 #include "hw/xen/xen.h"
32 #endif
33 #include "sysemu/kvm.h"
34 #include "sysemu/sysemu.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "qemu/error-report.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include "qemu.h"
40 #else /* !CONFIG_USER_ONLY */
41 #include "hw/hw.h"
42 #include "exec/memory.h"
43 #include "exec/ioport.h"
44 #include "sysemu/dma.h"
45 #include "exec/address-spaces.h"
46 #include "sysemu/xen-mapcache.h"
47 #include "trace.h"
48 #endif
49 #include "exec/cpu-all.h"
50 #include "qemu/rcu_queue.h"
51 #include "qemu/main-loop.h"
52 #include "translate-all.h"
53 #include "sysemu/replay.h"
55 #include "exec/memory-internal.h"
56 #include "exec/ram_addr.h"
57 #include "exec/log.h"
59 #include "migration/vmstate.h"
61 #include "qemu/range.h"
62 #ifndef _WIN32
63 #include "qemu/mmap-alloc.h"
64 #endif
66 //#define DEBUG_SUBPAGE
68 #if !defined(CONFIG_USER_ONLY)
69 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
70 * are protected by the ramlist lock.
72 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
74 static MemoryRegion *system_memory;
75 static MemoryRegion *system_io;
77 AddressSpace address_space_io;
78 AddressSpace address_space_memory;
80 MemoryRegion io_mem_rom, io_mem_notdirty;
81 static MemoryRegion io_mem_unassigned;
83 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
84 #define RAM_PREALLOC (1 << 0)
86 /* RAM is mmap-ed with MAP_SHARED */
87 #define RAM_SHARED (1 << 1)
89 /* Only a portion of RAM (used_length) is actually used, and migrated.
90 * This used_length size can change across reboots.
92 #define RAM_RESIZEABLE (1 << 2)
94 #endif
96 #ifdef TARGET_PAGE_BITS_VARY
97 int target_page_bits;
98 bool target_page_bits_decided;
99 #endif
101 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
102 /* current CPU in the current thread. It is only valid inside
103 cpu_exec() */
104 __thread CPUState *current_cpu;
105 /* 0 = Do not count executed instructions.
106 1 = Precise instruction counting.
107 2 = Adaptive rate instruction counting. */
108 int use_icount;
110 bool set_preferred_target_page_bits(int bits)
112 /* The target page size is the lowest common denominator for all
113 * the CPUs in the system, so we can only make it smaller, never
114 * larger. And we can't make it smaller once we've committed to
115 * a particular size.
117 #ifdef TARGET_PAGE_BITS_VARY
118 assert(bits >= TARGET_PAGE_BITS_MIN);
119 if (target_page_bits == 0 || target_page_bits > bits) {
120 if (target_page_bits_decided) {
121 return false;
123 target_page_bits = bits;
125 #endif
126 return true;
129 #if !defined(CONFIG_USER_ONLY)
131 static void finalize_target_page_bits(void)
133 #ifdef TARGET_PAGE_BITS_VARY
134 if (target_page_bits == 0) {
135 target_page_bits = TARGET_PAGE_BITS_MIN;
137 target_page_bits_decided = true;
138 #endif
141 typedef struct PhysPageEntry PhysPageEntry;
143 struct PhysPageEntry {
144 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
145 uint32_t skip : 6;
146 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
147 uint32_t ptr : 26;
150 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
152 /* Size of the L2 (and L3, etc) page tables. */
153 #define ADDR_SPACE_BITS 64
155 #define P_L2_BITS 9
156 #define P_L2_SIZE (1 << P_L2_BITS)
158 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
160 typedef PhysPageEntry Node[P_L2_SIZE];
162 typedef struct PhysPageMap {
163 struct rcu_head rcu;
165 unsigned sections_nb;
166 unsigned sections_nb_alloc;
167 unsigned nodes_nb;
168 unsigned nodes_nb_alloc;
169 Node *nodes;
170 MemoryRegionSection *sections;
171 } PhysPageMap;
173 struct AddressSpaceDispatch {
174 struct rcu_head rcu;
176 MemoryRegionSection *mru_section;
177 /* This is a multi-level map on the physical address space.
178 * The bottom level has pointers to MemoryRegionSections.
180 PhysPageEntry phys_map;
181 PhysPageMap map;
182 AddressSpace *as;
185 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
186 typedef struct subpage_t {
187 MemoryRegion iomem;
188 AddressSpace *as;
189 hwaddr base;
190 uint16_t sub_section[];
191 } subpage_t;
193 #define PHYS_SECTION_UNASSIGNED 0
194 #define PHYS_SECTION_NOTDIRTY 1
195 #define PHYS_SECTION_ROM 2
196 #define PHYS_SECTION_WATCH 3
198 static void io_mem_init(void);
199 static void memory_map_init(void);
200 static void tcg_commit(MemoryListener *listener);
202 static MemoryRegion io_mem_watch;
205 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
206 * @cpu: the CPU whose AddressSpace this is
207 * @as: the AddressSpace itself
208 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
209 * @tcg_as_listener: listener for tracking changes to the AddressSpace
211 struct CPUAddressSpace {
212 CPUState *cpu;
213 AddressSpace *as;
214 struct AddressSpaceDispatch *memory_dispatch;
215 MemoryListener tcg_as_listener;
218 #endif
220 #if !defined(CONFIG_USER_ONLY)
222 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
224 static unsigned alloc_hint = 16;
225 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
226 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, alloc_hint);
227 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
228 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
229 alloc_hint = map->nodes_nb_alloc;
233 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
235 unsigned i;
236 uint32_t ret;
237 PhysPageEntry e;
238 PhysPageEntry *p;
240 ret = map->nodes_nb++;
241 p = map->nodes[ret];
242 assert(ret != PHYS_MAP_NODE_NIL);
243 assert(ret != map->nodes_nb_alloc);
245 e.skip = leaf ? 0 : 1;
246 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
247 for (i = 0; i < P_L2_SIZE; ++i) {
248 memcpy(&p[i], &e, sizeof(e));
250 return ret;
253 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
254 hwaddr *index, hwaddr *nb, uint16_t leaf,
255 int level)
257 PhysPageEntry *p;
258 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
260 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
261 lp->ptr = phys_map_node_alloc(map, level == 0);
263 p = map->nodes[lp->ptr];
264 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
266 while (*nb && lp < &p[P_L2_SIZE]) {
267 if ((*index & (step - 1)) == 0 && *nb >= step) {
268 lp->skip = 0;
269 lp->ptr = leaf;
270 *index += step;
271 *nb -= step;
272 } else {
273 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
275 ++lp;
279 static void phys_page_set(AddressSpaceDispatch *d,
280 hwaddr index, hwaddr nb,
281 uint16_t leaf)
283 /* Wildly overreserve - it doesn't matter much. */
284 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
286 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
289 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
290 * and update our entry so we can skip it and go directly to the destination.
292 static void phys_page_compact(PhysPageEntry *lp, Node *nodes)
294 unsigned valid_ptr = P_L2_SIZE;
295 int valid = 0;
296 PhysPageEntry *p;
297 int i;
299 if (lp->ptr == PHYS_MAP_NODE_NIL) {
300 return;
303 p = nodes[lp->ptr];
304 for (i = 0; i < P_L2_SIZE; i++) {
305 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
306 continue;
309 valid_ptr = i;
310 valid++;
311 if (p[i].skip) {
312 phys_page_compact(&p[i], nodes);
316 /* We can only compress if there's only one child. */
317 if (valid != 1) {
318 return;
321 assert(valid_ptr < P_L2_SIZE);
323 /* Don't compress if it won't fit in the # of bits we have. */
324 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
325 return;
328 lp->ptr = p[valid_ptr].ptr;
329 if (!p[valid_ptr].skip) {
330 /* If our only child is a leaf, make this a leaf. */
331 /* By design, we should have made this node a leaf to begin with so we
332 * should never reach here.
333 * But since it's so simple to handle this, let's do it just in case we
334 * change this rule.
336 lp->skip = 0;
337 } else {
338 lp->skip += p[valid_ptr].skip;
342 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
344 if (d->phys_map.skip) {
345 phys_page_compact(&d->phys_map, d->map.nodes);
349 static inline bool section_covers_addr(const MemoryRegionSection *section,
350 hwaddr addr)
352 /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
353 * the section must cover the entire address space.
355 return int128_gethi(section->size) ||
356 range_covers_byte(section->offset_within_address_space,
357 int128_getlo(section->size), addr);
360 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
361 Node *nodes, MemoryRegionSection *sections)
363 PhysPageEntry *p;
364 hwaddr index = addr >> TARGET_PAGE_BITS;
365 int i;
367 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
368 if (lp.ptr == PHYS_MAP_NODE_NIL) {
369 return &sections[PHYS_SECTION_UNASSIGNED];
371 p = nodes[lp.ptr];
372 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
375 if (section_covers_addr(&sections[lp.ptr], addr)) {
376 return &sections[lp.ptr];
377 } else {
378 return &sections[PHYS_SECTION_UNASSIGNED];
382 bool memory_region_is_unassigned(MemoryRegion *mr)
384 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
385 && mr != &io_mem_watch;
388 /* Called from RCU critical section */
389 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
390 hwaddr addr,
391 bool resolve_subpage)
393 MemoryRegionSection *section = atomic_read(&d->mru_section);
394 subpage_t *subpage;
395 bool update;
397 if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
398 section_covers_addr(section, addr)) {
399 update = false;
400 } else {
401 section = phys_page_find(d->phys_map, addr, d->map.nodes,
402 d->map.sections);
403 update = true;
405 if (resolve_subpage && section->mr->subpage) {
406 subpage = container_of(section->mr, subpage_t, iomem);
407 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
409 if (update) {
410 atomic_set(&d->mru_section, section);
412 return section;
415 /* Called from RCU critical section */
416 static MemoryRegionSection *
417 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
418 hwaddr *plen, bool resolve_subpage)
420 MemoryRegionSection *section;
421 MemoryRegion *mr;
422 Int128 diff;
424 section = address_space_lookup_region(d, addr, resolve_subpage);
425 /* Compute offset within MemoryRegionSection */
426 addr -= section->offset_within_address_space;
428 /* Compute offset within MemoryRegion */
429 *xlat = addr + section->offset_within_region;
431 mr = section->mr;
433 /* MMIO registers can be expected to perform full-width accesses based only
434 * on their address, without considering adjacent registers that could
435 * decode to completely different MemoryRegions. When such registers
436 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
437 * regions overlap wildly. For this reason we cannot clamp the accesses
438 * here.
440 * If the length is small (as is the case for address_space_ldl/stl),
441 * everything works fine. If the incoming length is large, however,
442 * the caller really has to do the clamping through memory_access_size.
444 if (memory_region_is_ram(mr)) {
445 diff = int128_sub(section->size, int128_make64(addr));
446 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
448 return section;
451 /* Called from RCU critical section */
452 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
453 hwaddr *xlat, hwaddr *plen,
454 bool is_write)
456 IOMMUTLBEntry iotlb;
457 MemoryRegionSection *section;
458 MemoryRegion *mr;
460 for (;;) {
461 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
462 section = address_space_translate_internal(d, addr, &addr, plen, true);
463 mr = section->mr;
465 if (!mr->iommu_ops) {
466 break;
469 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
470 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
471 | (addr & iotlb.addr_mask));
472 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
473 if (!(iotlb.perm & (1 << is_write))) {
474 mr = &io_mem_unassigned;
475 break;
478 as = iotlb.target_as;
481 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
482 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
483 *plen = MIN(page, *plen);
486 *xlat = addr;
487 return mr;
490 /* Called from RCU critical section */
491 MemoryRegionSection *
492 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
493 hwaddr *xlat, hwaddr *plen)
495 MemoryRegionSection *section;
496 AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;
498 section = address_space_translate_internal(d, addr, xlat, plen, false);
500 assert(!section->mr->iommu_ops);
501 return section;
503 #endif
505 #if !defined(CONFIG_USER_ONLY)
507 static int cpu_common_post_load(void *opaque, int version_id)
509 CPUState *cpu = opaque;
511 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
512 version_id is increased. */
513 cpu->interrupt_request &= ~0x01;
514 tlb_flush(cpu, 1);
516 return 0;
519 static int cpu_common_pre_load(void *opaque)
521 CPUState *cpu = opaque;
523 cpu->exception_index = -1;
525 return 0;
528 static bool cpu_common_exception_index_needed(void *opaque)
530 CPUState *cpu = opaque;
532 return tcg_enabled() && cpu->exception_index != -1;
535 static const VMStateDescription vmstate_cpu_common_exception_index = {
536 .name = "cpu_common/exception_index",
537 .version_id = 1,
538 .minimum_version_id = 1,
539 .needed = cpu_common_exception_index_needed,
540 .fields = (VMStateField[]) {
541 VMSTATE_INT32(exception_index, CPUState),
542 VMSTATE_END_OF_LIST()
546 static bool cpu_common_crash_occurred_needed(void *opaque)
548 CPUState *cpu = opaque;
550 return cpu->crash_occurred;
553 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
554 .name = "cpu_common/crash_occurred",
555 .version_id = 1,
556 .minimum_version_id = 1,
557 .needed = cpu_common_crash_occurred_needed,
558 .fields = (VMStateField[]) {
559 VMSTATE_BOOL(crash_occurred, CPUState),
560 VMSTATE_END_OF_LIST()
564 const VMStateDescription vmstate_cpu_common = {
565 .name = "cpu_common",
566 .version_id = 1,
567 .minimum_version_id = 1,
568 .pre_load = cpu_common_pre_load,
569 .post_load = cpu_common_post_load,
570 .fields = (VMStateField[]) {
571 VMSTATE_UINT32(halted, CPUState),
572 VMSTATE_UINT32(interrupt_request, CPUState),
573 VMSTATE_END_OF_LIST()
575 .subsections = (const VMStateDescription*[]) {
576 &vmstate_cpu_common_exception_index,
577 &vmstate_cpu_common_crash_occurred,
578 NULL
582 #endif
584 CPUState *qemu_get_cpu(int index)
586 CPUState *cpu;
588 CPU_FOREACH(cpu) {
589 if (cpu->cpu_index == index) {
590 return cpu;
594 return NULL;
597 #if !defined(CONFIG_USER_ONLY)
598 void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
600 CPUAddressSpace *newas;
602 /* Target code should have set num_ases before calling us */
603 assert(asidx < cpu->num_ases);
605 if (asidx == 0) {
606 /* address space 0 gets the convenience alias */
607 cpu->as = as;
610 /* KVM cannot currently support multiple address spaces. */
611 assert(asidx == 0 || !kvm_enabled());
613 if (!cpu->cpu_ases) {
614 cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
617 newas = &cpu->cpu_ases[asidx];
618 newas->cpu = cpu;
619 newas->as = as;
620 if (tcg_enabled()) {
621 newas->tcg_as_listener.commit = tcg_commit;
622 memory_listener_register(&newas->tcg_as_listener, as);
626 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
628 /* Return the AddressSpace corresponding to the specified index */
629 return cpu->cpu_ases[asidx].as;
631 #endif
633 void cpu_exec_unrealizefn(CPUState *cpu)
635 CPUClass *cc = CPU_GET_CLASS(cpu);
637 cpu_list_remove(cpu);
639 if (cc->vmsd != NULL) {
640 vmstate_unregister(NULL, cc->vmsd, cpu);
642 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
643 vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
647 void cpu_exec_initfn(CPUState *cpu)
649 cpu->as = NULL;
650 cpu->num_ases = 0;
652 #ifndef CONFIG_USER_ONLY
653 cpu->thread_id = qemu_get_thread_id();
655 /* This is a softmmu CPU object, so create a property for it
656 * so users can wire up its memory. (This can't go in qom/cpu.c
657 * because that file is compiled only once for both user-mode
658 * and system builds.) The default if no link is set up is to use
659 * the system address space.
661 object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
662 (Object **)&cpu->memory,
663 qdev_prop_allow_set_link_before_realize,
664 OBJ_PROP_LINK_UNREF_ON_RELEASE,
665 &error_abort);
666 cpu->memory = system_memory;
667 object_ref(OBJECT(cpu->memory));
668 #endif
671 void cpu_exec_realizefn(CPUState *cpu, Error **errp)
673 CPUClass *cc ATTRIBUTE_UNUSED = CPU_GET_CLASS(cpu);
675 cpu_list_add(cpu);
677 #ifndef CONFIG_USER_ONLY
678 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
679 vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
681 if (cc->vmsd != NULL) {
682 vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
684 #endif
687 #if defined(CONFIG_USER_ONLY)
688 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
690 mmap_lock();
691 tb_lock();
692 tb_invalidate_phys_page_range(pc, pc + 1, 0);
693 tb_unlock();
694 mmap_unlock();
696 #else
697 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
699 MemTxAttrs attrs;
700 hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
701 int asidx = cpu_asidx_from_attrs(cpu, attrs);
702 if (phys != -1) {
703 /* Locks grabbed by tb_invalidate_phys_addr */
704 tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
705 phys | (pc & ~TARGET_PAGE_MASK));
708 #endif
710 #if defined(CONFIG_USER_ONLY)
711 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
716 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
717 int flags)
719 return -ENOSYS;
722 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
726 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
727 int flags, CPUWatchpoint **watchpoint)
729 return -ENOSYS;
731 #else
732 /* Add a watchpoint. */
733 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
734 int flags, CPUWatchpoint **watchpoint)
736 CPUWatchpoint *wp;
738 /* forbid ranges which are empty or run off the end of the address space */
739 if (len == 0 || (addr + len - 1) < addr) {
740 error_report("tried to set invalid watchpoint at %"
741 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
742 return -EINVAL;
744 wp = g_malloc(sizeof(*wp));
746 wp->vaddr = addr;
747 wp->len = len;
748 wp->flags = flags;
750 /* keep all GDB-injected watchpoints in front */
751 if (flags & BP_GDB) {
752 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
753 } else {
754 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
757 tlb_flush_page(cpu, addr);
759 if (watchpoint)
760 *watchpoint = wp;
761 return 0;
764 /* Remove a specific watchpoint. */
765 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
766 int flags)
768 CPUWatchpoint *wp;
770 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
771 if (addr == wp->vaddr && len == wp->len
772 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
773 cpu_watchpoint_remove_by_ref(cpu, wp);
774 return 0;
777 return -ENOENT;
780 /* Remove a specific watchpoint by reference. */
781 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
783 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
785 tlb_flush_page(cpu, watchpoint->vaddr);
787 g_free(watchpoint);
790 /* Remove all matching watchpoints. */
791 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
793 CPUWatchpoint *wp, *next;
795 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
796 if (wp->flags & mask) {
797 cpu_watchpoint_remove_by_ref(cpu, wp);
802 /* Return true if this watchpoint address matches the specified
803 * access (ie the address range covered by the watchpoint overlaps
804 * partially or completely with the address range covered by the
805 * access).
807 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
808 vaddr addr,
809 vaddr len)
811 /* We know the lengths are non-zero, but a little caution is
812 * required to avoid errors in the case where the range ends
813 * exactly at the top of the address space and so addr + len
814 * wraps round to zero.
816 vaddr wpend = wp->vaddr + wp->len - 1;
817 vaddr addrend = addr + len - 1;
819 return !(addr > wpend || wp->vaddr > addrend);
822 #endif
824 /* Add a breakpoint. */
825 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
826 CPUBreakpoint **breakpoint)
828 CPUBreakpoint *bp;
830 bp = g_malloc(sizeof(*bp));
832 bp->pc = pc;
833 bp->flags = flags;
835 /* keep all GDB-injected breakpoints in front */
836 if (flags & BP_GDB) {
837 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
838 } else {
839 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
842 breakpoint_invalidate(cpu, pc);
844 if (breakpoint) {
845 *breakpoint = bp;
847 return 0;
850 /* Remove a specific breakpoint. */
851 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
853 CPUBreakpoint *bp;
855 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
856 if (bp->pc == pc && bp->flags == flags) {
857 cpu_breakpoint_remove_by_ref(cpu, bp);
858 return 0;
861 return -ENOENT;
864 /* Remove a specific breakpoint by reference. */
865 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
867 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
869 breakpoint_invalidate(cpu, breakpoint->pc);
871 g_free(breakpoint);
874 /* Remove all matching breakpoints. */
875 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
877 CPUBreakpoint *bp, *next;
879 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
880 if (bp->flags & mask) {
881 cpu_breakpoint_remove_by_ref(cpu, bp);
886 /* enable or disable single step mode. EXCP_DEBUG is returned by the
887 CPU loop after each instruction */
888 void cpu_single_step(CPUState *cpu, int enabled)
890 if (cpu->singlestep_enabled != enabled) {
891 cpu->singlestep_enabled = enabled;
892 if (kvm_enabled()) {
893 kvm_update_guest_debug(cpu, 0);
894 } else {
895 /* must flush all the translated code to avoid inconsistencies */
896 /* XXX: only flush what is necessary */
897 tb_flush(cpu);
902 void cpu_abort(CPUState *cpu, const char *fmt, ...)
904 va_list ap;
905 va_list ap2;
907 va_start(ap, fmt);
908 va_copy(ap2, ap);
909 fprintf(stderr, "qemu: fatal: ");
910 vfprintf(stderr, fmt, ap);
911 fprintf(stderr, "\n");
912 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
913 if (qemu_log_separate()) {
914 qemu_log_lock();
915 qemu_log("qemu: fatal: ");
916 qemu_log_vprintf(fmt, ap2);
917 qemu_log("\n");
918 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
919 qemu_log_flush();
920 qemu_log_unlock();
921 qemu_log_close();
923 va_end(ap2);
924 va_end(ap);
925 replay_finish();
926 #if defined(CONFIG_USER_ONLY)
928 struct sigaction act;
929 sigfillset(&act.sa_mask);
930 act.sa_handler = SIG_DFL;
931 sigaction(SIGABRT, &act, NULL);
933 #endif
934 abort();
937 #if !defined(CONFIG_USER_ONLY)
938 /* Called from RCU critical section */
939 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
941 RAMBlock *block;
943 block = atomic_rcu_read(&ram_list.mru_block);
944 if (block && addr - block->offset < block->max_length) {
945 return block;
947 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
948 if (addr - block->offset < block->max_length) {
949 goto found;
953 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
954 abort();
956 found:
957 /* It is safe to write mru_block outside the iothread lock. This
958 * is what happens:
960 * mru_block = xxx
961 * rcu_read_unlock()
962 * xxx removed from list
963 * rcu_read_lock()
964 * read mru_block
965 * mru_block = NULL;
966 * call_rcu(reclaim_ramblock, xxx);
967 * rcu_read_unlock()
969 * atomic_rcu_set is not needed here. The block was already published
970 * when it was placed into the list. Here we're just making an extra
971 * copy of the pointer.
973 ram_list.mru_block = block;
974 return block;
977 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
979 CPUState *cpu;
980 ram_addr_t start1;
981 RAMBlock *block;
982 ram_addr_t end;
984 end = TARGET_PAGE_ALIGN(start + length);
985 start &= TARGET_PAGE_MASK;
987 rcu_read_lock();
988 block = qemu_get_ram_block(start);
989 assert(block == qemu_get_ram_block(end - 1));
990 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
991 CPU_FOREACH(cpu) {
992 tlb_reset_dirty(cpu, start1, length);
994 rcu_read_unlock();
997 /* Note: start and end must be within the same ram block. */
998 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
999 ram_addr_t length,
1000 unsigned client)
1002 DirtyMemoryBlocks *blocks;
1003 unsigned long end, page;
1004 bool dirty = false;
1006 if (length == 0) {
1007 return false;
1010 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
1011 page = start >> TARGET_PAGE_BITS;
1013 rcu_read_lock();
1015 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1017 while (page < end) {
1018 unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1019 unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1020 unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1022 dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1023 offset, num);
1024 page += num;
1027 rcu_read_unlock();
1029 if (dirty && tcg_enabled()) {
1030 tlb_reset_dirty_range_all(start, length);
1033 return dirty;
1036 /* Called from RCU critical section */
1037 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1038 MemoryRegionSection *section,
1039 target_ulong vaddr,
1040 hwaddr paddr, hwaddr xlat,
1041 int prot,
1042 target_ulong *address)
1044 hwaddr iotlb;
1045 CPUWatchpoint *wp;
1047 if (memory_region_is_ram(section->mr)) {
1048 /* Normal RAM. */
1049 iotlb = memory_region_get_ram_addr(section->mr) + xlat;
1050 if (!section->readonly) {
1051 iotlb |= PHYS_SECTION_NOTDIRTY;
1052 } else {
1053 iotlb |= PHYS_SECTION_ROM;
1055 } else {
1056 AddressSpaceDispatch *d;
1058 d = atomic_rcu_read(&section->address_space->dispatch);
1059 iotlb = section - d->map.sections;
1060 iotlb += xlat;
1063 /* Make accesses to pages with watchpoints go via the
1064 watchpoint trap routines. */
1065 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1066 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1067 /* Avoid trapping reads of pages with a write breakpoint. */
1068 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1069 iotlb = PHYS_SECTION_WATCH + paddr;
1070 *address |= TLB_MMIO;
1071 break;
1076 return iotlb;
1078 #endif /* defined(CONFIG_USER_ONLY) */
1080 #if !defined(CONFIG_USER_ONLY)
1082 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1083 uint16_t section);
1084 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1086 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1087 qemu_anon_ram_alloc;
1090 * Set a custom physical guest memory alloator.
1091 * Accelerators with unusual needs may need this. Hopefully, we can
1092 * get rid of it eventually.
1094 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1096 phys_mem_alloc = alloc;
1099 static uint16_t phys_section_add(PhysPageMap *map,
1100 MemoryRegionSection *section)
1102 /* The physical section number is ORed with a page-aligned
1103 * pointer to produce the iotlb entries. Thus it should
1104 * never overflow into the page-aligned value.
1106 assert(map->sections_nb < TARGET_PAGE_SIZE);
1108 if (map->sections_nb == map->sections_nb_alloc) {
1109 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1110 map->sections = g_renew(MemoryRegionSection, map->sections,
1111 map->sections_nb_alloc);
1113 map->sections[map->sections_nb] = *section;
1114 memory_region_ref(section->mr);
1115 return map->sections_nb++;
1118 static void phys_section_destroy(MemoryRegion *mr)
1120 bool have_sub_page = mr->subpage;
1122 memory_region_unref(mr);
1124 if (have_sub_page) {
1125 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1126 object_unref(OBJECT(&subpage->iomem));
1127 g_free(subpage);
1131 static void phys_sections_free(PhysPageMap *map)
1133 while (map->sections_nb > 0) {
1134 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1135 phys_section_destroy(section->mr);
1137 g_free(map->sections);
1138 g_free(map->nodes);
1141 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1143 subpage_t *subpage;
1144 hwaddr base = section->offset_within_address_space
1145 & TARGET_PAGE_MASK;
1146 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1147 d->map.nodes, d->map.sections);
1148 MemoryRegionSection subsection = {
1149 .offset_within_address_space = base,
1150 .size = int128_make64(TARGET_PAGE_SIZE),
1152 hwaddr start, end;
1154 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1156 if (!(existing->mr->subpage)) {
1157 subpage = subpage_init(d->as, base);
1158 subsection.address_space = d->as;
1159 subsection.mr = &subpage->iomem;
1160 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1161 phys_section_add(&d->map, &subsection));
1162 } else {
1163 subpage = container_of(existing->mr, subpage_t, iomem);
1165 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1166 end = start + int128_get64(section->size) - 1;
1167 subpage_register(subpage, start, end,
1168 phys_section_add(&d->map, section));
1172 static void register_multipage(AddressSpaceDispatch *d,
1173 MemoryRegionSection *section)
1175 hwaddr start_addr = section->offset_within_address_space;
1176 uint16_t section_index = phys_section_add(&d->map, section);
1177 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1178 TARGET_PAGE_BITS));
1180 assert(num_pages);
1181 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1184 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1186 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1187 AddressSpaceDispatch *d = as->next_dispatch;
1188 MemoryRegionSection now = *section, remain = *section;
1189 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1191 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1192 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1193 - now.offset_within_address_space;
1195 now.size = int128_min(int128_make64(left), now.size);
1196 register_subpage(d, &now);
1197 } else {
1198 now.size = int128_zero();
1200 while (int128_ne(remain.size, now.size)) {
1201 remain.size = int128_sub(remain.size, now.size);
1202 remain.offset_within_address_space += int128_get64(now.size);
1203 remain.offset_within_region += int128_get64(now.size);
1204 now = remain;
1205 if (int128_lt(remain.size, page_size)) {
1206 register_subpage(d, &now);
1207 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1208 now.size = page_size;
1209 register_subpage(d, &now);
1210 } else {
1211 now.size = int128_and(now.size, int128_neg(page_size));
1212 register_multipage(d, &now);
1217 void qemu_flush_coalesced_mmio_buffer(void)
1219 if (kvm_enabled())
1220 kvm_flush_coalesced_mmio_buffer();
1223 void qemu_mutex_lock_ramlist(void)
1225 qemu_mutex_lock(&ram_list.mutex);
1228 void qemu_mutex_unlock_ramlist(void)
1230 qemu_mutex_unlock(&ram_list.mutex);
1233 #ifdef __linux__
1234 static void *file_ram_alloc(RAMBlock *block,
1235 ram_addr_t memory,
1236 const char *path,
1237 Error **errp)
1239 bool unlink_on_error = false;
1240 char *filename;
1241 char *sanitized_name;
1242 char *c;
1243 void *area = MAP_FAILED;
1244 int fd = -1;
1246 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1247 error_setg(errp,
1248 "host lacks kvm mmu notifiers, -mem-path unsupported");
1249 return NULL;
1252 for (;;) {
1253 fd = open(path, O_RDWR);
1254 if (fd >= 0) {
1255 /* @path names an existing file, use it */
1256 break;
1258 if (errno == ENOENT) {
1259 /* @path names a file that doesn't exist, create it */
1260 fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1261 if (fd >= 0) {
1262 unlink_on_error = true;
1263 break;
1265 } else if (errno == EISDIR) {
1266 /* @path names a directory, create a file there */
1267 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1268 sanitized_name = g_strdup(memory_region_name(block->mr));
1269 for (c = sanitized_name; *c != '\0'; c++) {
1270 if (*c == '/') {
1271 *c = '_';
1275 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1276 sanitized_name);
1277 g_free(sanitized_name);
1279 fd = mkstemp(filename);
1280 if (fd >= 0) {
1281 unlink(filename);
1282 g_free(filename);
1283 break;
1285 g_free(filename);
1287 if (errno != EEXIST && errno != EINTR) {
1288 error_setg_errno(errp, errno,
1289 "can't open backing store %s for guest RAM",
1290 path);
1291 goto error;
1294 * Try again on EINTR and EEXIST. The latter happens when
1295 * something else creates the file between our two open().
1299 block->page_size = qemu_fd_getpagesize(fd);
1300 block->mr->align = block->page_size;
1301 #if defined(__s390x__)
1302 if (kvm_enabled()) {
1303 block->mr->align = MAX(block->mr->align, QEMU_VMALLOC_ALIGN);
1305 #endif
1307 if (memory < block->page_size) {
1308 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1309 "or larger than page size 0x%zx",
1310 memory, block->page_size);
1311 goto error;
1314 memory = ROUND_UP(memory, block->page_size);
1317 * ftruncate is not supported by hugetlbfs in older
1318 * hosts, so don't bother bailing out on errors.
1319 * If anything goes wrong with it under other filesystems,
1320 * mmap will fail.
1322 if (ftruncate(fd, memory)) {
1323 perror("ftruncate");
1326 area = qemu_ram_mmap(fd, memory, block->mr->align,
1327 block->flags & RAM_SHARED);
1328 if (area == MAP_FAILED) {
1329 error_setg_errno(errp, errno,
1330 "unable to map backing store for guest RAM");
1331 goto error;
1334 if (mem_prealloc) {
1335 os_mem_prealloc(fd, area, memory, errp);
1336 if (errp && *errp) {
1337 goto error;
1341 block->fd = fd;
1342 return area;
1344 error:
1345 if (area != MAP_FAILED) {
1346 qemu_ram_munmap(area, memory);
1348 if (unlink_on_error) {
1349 unlink(path);
1351 if (fd != -1) {
1352 close(fd);
1354 return NULL;
1356 #endif
1358 /* Called with the ramlist lock held. */
1359 static ram_addr_t find_ram_offset(ram_addr_t size)
1361 RAMBlock *block, *next_block;
1362 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1364 assert(size != 0); /* it would hand out same offset multiple times */
1366 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1367 return 0;
1370 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1371 ram_addr_t end, next = RAM_ADDR_MAX;
1373 end = block->offset + block->max_length;
1375 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1376 if (next_block->offset >= end) {
1377 next = MIN(next, next_block->offset);
1380 if (next - end >= size && next - end < mingap) {
1381 offset = end;
1382 mingap = next - end;
1386 if (offset == RAM_ADDR_MAX) {
1387 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1388 (uint64_t)size);
1389 abort();
1392 return offset;
1395 ram_addr_t last_ram_offset(void)
1397 RAMBlock *block;
1398 ram_addr_t last = 0;
1400 rcu_read_lock();
1401 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1402 last = MAX(last, block->offset + block->max_length);
1404 rcu_read_unlock();
1405 return last;
1408 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1410 int ret;
1412 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1413 if (!machine_dump_guest_core(current_machine)) {
1414 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1415 if (ret) {
1416 perror("qemu_madvise");
1417 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1418 "but dump_guest_core=off specified\n");
1423 const char *qemu_ram_get_idstr(RAMBlock *rb)
1425 return rb->idstr;
1428 /* Called with iothread lock held. */
1429 void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
1431 RAMBlock *block;
1433 assert(new_block);
1434 assert(!new_block->idstr[0]);
1436 if (dev) {
1437 char *id = qdev_get_dev_path(dev);
1438 if (id) {
1439 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1440 g_free(id);
1443 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1445 rcu_read_lock();
1446 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1447 if (block != new_block &&
1448 !strcmp(block->idstr, new_block->idstr)) {
1449 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1450 new_block->idstr);
1451 abort();
1454 rcu_read_unlock();
1457 /* Called with iothread lock held. */
1458 void qemu_ram_unset_idstr(RAMBlock *block)
1460 /* FIXME: arch_init.c assumes that this is not called throughout
1461 * migration. Ignore the problem since hot-unplug during migration
1462 * does not work anyway.
1464 if (block) {
1465 memset(block->idstr, 0, sizeof(block->idstr));
1469 size_t qemu_ram_pagesize(RAMBlock *rb)
1471 return rb->page_size;
1474 static int memory_try_enable_merging(void *addr, size_t len)
1476 if (!machine_mem_merge(current_machine)) {
1477 /* disabled by the user */
1478 return 0;
1481 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1484 /* Only legal before guest might have detected the memory size: e.g. on
1485 * incoming migration, or right after reset.
1487 * As memory core doesn't know how is memory accessed, it is up to
1488 * resize callback to update device state and/or add assertions to detect
1489 * misuse, if necessary.
1491 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
1493 assert(block);
1495 newsize = HOST_PAGE_ALIGN(newsize);
1497 if (block->used_length == newsize) {
1498 return 0;
1501 if (!(block->flags & RAM_RESIZEABLE)) {
1502 error_setg_errno(errp, EINVAL,
1503 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1504 " in != 0x" RAM_ADDR_FMT, block->idstr,
1505 newsize, block->used_length);
1506 return -EINVAL;
1509 if (block->max_length < newsize) {
1510 error_setg_errno(errp, EINVAL,
1511 "Length too large: %s: 0x" RAM_ADDR_FMT
1512 " > 0x" RAM_ADDR_FMT, block->idstr,
1513 newsize, block->max_length);
1514 return -EINVAL;
1517 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1518 block->used_length = newsize;
1519 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1520 DIRTY_CLIENTS_ALL);
1521 memory_region_set_size(block->mr, newsize);
1522 if (block->resized) {
1523 block->resized(block->idstr, newsize, block->host);
1525 return 0;
1528 /* Called with ram_list.mutex held */
1529 static void dirty_memory_extend(ram_addr_t old_ram_size,
1530 ram_addr_t new_ram_size)
1532 ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1533 DIRTY_MEMORY_BLOCK_SIZE);
1534 ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1535 DIRTY_MEMORY_BLOCK_SIZE);
1536 int i;
1538 /* Only need to extend if block count increased */
1539 if (new_num_blocks <= old_num_blocks) {
1540 return;
1543 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1544 DirtyMemoryBlocks *old_blocks;
1545 DirtyMemoryBlocks *new_blocks;
1546 int j;
1548 old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1549 new_blocks = g_malloc(sizeof(*new_blocks) +
1550 sizeof(new_blocks->blocks[0]) * new_num_blocks);
1552 if (old_num_blocks) {
1553 memcpy(new_blocks->blocks, old_blocks->blocks,
1554 old_num_blocks * sizeof(old_blocks->blocks[0]));
1557 for (j = old_num_blocks; j < new_num_blocks; j++) {
1558 new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1561 atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1563 if (old_blocks) {
1564 g_free_rcu(old_blocks, rcu);
1569 static void ram_block_add(RAMBlock *new_block, Error **errp)
1571 RAMBlock *block;
1572 RAMBlock *last_block = NULL;
1573 ram_addr_t old_ram_size, new_ram_size;
1574 Error *err = NULL;
1576 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1578 qemu_mutex_lock_ramlist();
1579 new_block->offset = find_ram_offset(new_block->max_length);
1581 if (!new_block->host) {
1582 if (xen_enabled()) {
1583 xen_ram_alloc(new_block->offset, new_block->max_length,
1584 new_block->mr, &err);
1585 if (err) {
1586 error_propagate(errp, err);
1587 qemu_mutex_unlock_ramlist();
1588 return;
1590 } else {
1591 new_block->host = phys_mem_alloc(new_block->max_length,
1592 &new_block->mr->align);
1593 if (!new_block->host) {
1594 error_setg_errno(errp, errno,
1595 "cannot set up guest memory '%s'",
1596 memory_region_name(new_block->mr));
1597 qemu_mutex_unlock_ramlist();
1598 return;
1600 memory_try_enable_merging(new_block->host, new_block->max_length);
1604 new_ram_size = MAX(old_ram_size,
1605 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1606 if (new_ram_size > old_ram_size) {
1607 migration_bitmap_extend(old_ram_size, new_ram_size);
1608 dirty_memory_extend(old_ram_size, new_ram_size);
1610 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1611 * QLIST (which has an RCU-friendly variant) does not have insertion at
1612 * tail, so save the last element in last_block.
1614 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1615 last_block = block;
1616 if (block->max_length < new_block->max_length) {
1617 break;
1620 if (block) {
1621 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1622 } else if (last_block) {
1623 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1624 } else { /* list is empty */
1625 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1627 ram_list.mru_block = NULL;
1629 /* Write list before version */
1630 smp_wmb();
1631 ram_list.version++;
1632 qemu_mutex_unlock_ramlist();
1634 cpu_physical_memory_set_dirty_range(new_block->offset,
1635 new_block->used_length,
1636 DIRTY_CLIENTS_ALL);
1638 if (new_block->host) {
1639 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1640 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1641 /* MADV_DONTFORK is also needed by KVM in absence of synchronous MMU */
1642 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1646 #ifdef __linux__
1647 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1648 bool share, const char *mem_path,
1649 Error **errp)
1651 RAMBlock *new_block;
1652 Error *local_err = NULL;
1654 if (xen_enabled()) {
1655 error_setg(errp, "-mem-path not supported with Xen");
1656 return NULL;
1659 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1661 * file_ram_alloc() needs to allocate just like
1662 * phys_mem_alloc, but we haven't bothered to provide
1663 * a hook there.
1665 error_setg(errp,
1666 "-mem-path not supported with this accelerator");
1667 return NULL;
1670 size = HOST_PAGE_ALIGN(size);
1671 new_block = g_malloc0(sizeof(*new_block));
1672 new_block->mr = mr;
1673 new_block->used_length = size;
1674 new_block->max_length = size;
1675 new_block->flags = share ? RAM_SHARED : 0;
1676 new_block->host = file_ram_alloc(new_block, size,
1677 mem_path, errp);
1678 if (!new_block->host) {
1679 g_free(new_block);
1680 return NULL;
1683 ram_block_add(new_block, &local_err);
1684 if (local_err) {
1685 g_free(new_block);
1686 error_propagate(errp, local_err);
1687 return NULL;
1689 return new_block;
1691 #endif
1693 static
1694 RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1695 void (*resized)(const char*,
1696 uint64_t length,
1697 void *host),
1698 void *host, bool resizeable,
1699 MemoryRegion *mr, Error **errp)
1701 RAMBlock *new_block;
1702 Error *local_err = NULL;
1704 size = HOST_PAGE_ALIGN(size);
1705 max_size = HOST_PAGE_ALIGN(max_size);
1706 new_block = g_malloc0(sizeof(*new_block));
1707 new_block->mr = mr;
1708 new_block->resized = resized;
1709 new_block->used_length = size;
1710 new_block->max_length = max_size;
1711 assert(max_size >= size);
1712 new_block->fd = -1;
1713 new_block->page_size = getpagesize();
1714 new_block->host = host;
1715 if (host) {
1716 new_block->flags |= RAM_PREALLOC;
1718 if (resizeable) {
1719 new_block->flags |= RAM_RESIZEABLE;
1721 ram_block_add(new_block, &local_err);
1722 if (local_err) {
1723 g_free(new_block);
1724 error_propagate(errp, local_err);
1725 return NULL;
1727 return new_block;
1730 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1731 MemoryRegion *mr, Error **errp)
1733 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1736 RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1738 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1741 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1742 void (*resized)(const char*,
1743 uint64_t length,
1744 void *host),
1745 MemoryRegion *mr, Error **errp)
1747 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1750 static void reclaim_ramblock(RAMBlock *block)
1752 if (block->flags & RAM_PREALLOC) {
1754 } else if (xen_enabled()) {
1755 xen_invalidate_map_cache_entry(block->host);
1756 #ifndef _WIN32
1757 } else if (block->fd >= 0) {
1758 qemu_ram_munmap(block->host, block->max_length);
1759 close(block->fd);
1760 #endif
1761 } else {
1762 qemu_anon_ram_free(block->host, block->max_length);
1764 g_free(block);
1767 void qemu_ram_free(RAMBlock *block)
1769 if (!block) {
1770 return;
1773 qemu_mutex_lock_ramlist();
1774 QLIST_REMOVE_RCU(block, next);
1775 ram_list.mru_block = NULL;
1776 /* Write list before version */
1777 smp_wmb();
1778 ram_list.version++;
1779 call_rcu(block, reclaim_ramblock, rcu);
1780 qemu_mutex_unlock_ramlist();
1783 #ifndef _WIN32
1784 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1786 RAMBlock *block;
1787 ram_addr_t offset;
1788 int flags;
1789 void *area, *vaddr;
1791 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1792 offset = addr - block->offset;
1793 if (offset < block->max_length) {
1794 vaddr = ramblock_ptr(block, offset);
1795 if (block->flags & RAM_PREALLOC) {
1797 } else if (xen_enabled()) {
1798 abort();
1799 } else {
1800 flags = MAP_FIXED;
1801 if (block->fd >= 0) {
1802 flags |= (block->flags & RAM_SHARED ?
1803 MAP_SHARED : MAP_PRIVATE);
1804 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1805 flags, block->fd, offset);
1806 } else {
1808 * Remap needs to match alloc. Accelerators that
1809 * set phys_mem_alloc never remap. If they did,
1810 * we'd need a remap hook here.
1812 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1814 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1815 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1816 flags, -1, 0);
1818 if (area != vaddr) {
1819 fprintf(stderr, "Could not remap addr: "
1820 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1821 length, addr);
1822 exit(1);
1824 memory_try_enable_merging(vaddr, length);
1825 qemu_ram_setup_dump(vaddr, length);
1830 #endif /* !_WIN32 */
1832 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1833 * This should not be used for general purpose DMA. Use address_space_map
1834 * or address_space_rw instead. For local memory (e.g. video ram) that the
1835 * device owns, use memory_region_get_ram_ptr.
1837 * Called within RCU critical section.
1839 void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1841 RAMBlock *block = ram_block;
1843 if (block == NULL) {
1844 block = qemu_get_ram_block(addr);
1845 addr -= block->offset;
1848 if (xen_enabled() && block->host == NULL) {
1849 /* We need to check if the requested address is in the RAM
1850 * because we don't want to map the entire memory in QEMU.
1851 * In that case just map until the end of the page.
1853 if (block->offset == 0) {
1854 return xen_map_cache(addr, 0, 0);
1857 block->host = xen_map_cache(block->offset, block->max_length, 1);
1859 return ramblock_ptr(block, addr);
1862 /* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr
1863 * but takes a size argument.
1865 * Called within RCU critical section.
1867 static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1868 hwaddr *size)
1870 RAMBlock *block = ram_block;
1871 if (*size == 0) {
1872 return NULL;
1875 if (block == NULL) {
1876 block = qemu_get_ram_block(addr);
1877 addr -= block->offset;
1879 *size = MIN(*size, block->max_length - addr);
1881 if (xen_enabled() && block->host == NULL) {
1882 /* We need to check if the requested address is in the RAM
1883 * because we don't want to map the entire memory in QEMU.
1884 * In that case just map the requested area.
1886 if (block->offset == 0) {
1887 return xen_map_cache(addr, *size, 1);
1890 block->host = xen_map_cache(block->offset, block->max_length, 1);
1893 return ramblock_ptr(block, addr);
1897 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1898 * in that RAMBlock.
1900 * ptr: Host pointer to look up
1901 * round_offset: If true round the result offset down to a page boundary
1902 * *ram_addr: set to result ram_addr
1903 * *offset: set to result offset within the RAMBlock
1905 * Returns: RAMBlock (or NULL if not found)
1907 * By the time this function returns, the returned pointer is not protected
1908 * by RCU anymore. If the caller is not within an RCU critical section and
1909 * does not hold the iothread lock, it must have other means of protecting the
1910 * pointer, such as a reference to the region that includes the incoming
1911 * ram_addr_t.
1913 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1914 ram_addr_t *offset)
1916 RAMBlock *block;
1917 uint8_t *host = ptr;
1919 if (xen_enabled()) {
1920 ram_addr_t ram_addr;
1921 rcu_read_lock();
1922 ram_addr = xen_ram_addr_from_mapcache(ptr);
1923 block = qemu_get_ram_block(ram_addr);
1924 if (block) {
1925 *offset = ram_addr - block->offset;
1927 rcu_read_unlock();
1928 return block;
1931 rcu_read_lock();
1932 block = atomic_rcu_read(&ram_list.mru_block);
1933 if (block && block->host && host - block->host < block->max_length) {
1934 goto found;
1937 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1938 /* This case append when the block is not mapped. */
1939 if (block->host == NULL) {
1940 continue;
1942 if (host - block->host < block->max_length) {
1943 goto found;
1947 rcu_read_unlock();
1948 return NULL;
1950 found:
1951 *offset = (host - block->host);
1952 if (round_offset) {
1953 *offset &= TARGET_PAGE_MASK;
1955 rcu_read_unlock();
1956 return block;
1960 * Finds the named RAMBlock
1962 * name: The name of RAMBlock to find
1964 * Returns: RAMBlock (or NULL if not found)
1966 RAMBlock *qemu_ram_block_by_name(const char *name)
1968 RAMBlock *block;
1970 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1971 if (!strcmp(name, block->idstr)) {
1972 return block;
1976 return NULL;
1979 /* Some of the softmmu routines need to translate from a host pointer
1980 (typically a TLB entry) back to a ram offset. */
1981 ram_addr_t qemu_ram_addr_from_host(void *ptr)
1983 RAMBlock *block;
1984 ram_addr_t offset;
1986 block = qemu_ram_block_from_host(ptr, false, &offset);
1987 if (!block) {
1988 return RAM_ADDR_INVALID;
1991 return block->offset + offset;
1994 /* Called within RCU critical section. */
1995 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1996 uint64_t val, unsigned size)
1998 bool locked = false;
2000 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
2001 locked = true;
2002 tb_lock();
2003 tb_invalidate_phys_page_fast(ram_addr, size);
2005 switch (size) {
2006 case 1:
2007 stb_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2008 break;
2009 case 2:
2010 stw_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2011 break;
2012 case 4:
2013 stl_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2014 break;
2015 default:
2016 abort();
2019 if (locked) {
2020 tb_unlock();
2023 /* Set both VGA and migration bits for simplicity and to remove
2024 * the notdirty callback faster.
2026 cpu_physical_memory_set_dirty_range(ram_addr, size,
2027 DIRTY_CLIENTS_NOCODE);
2028 /* we remove the notdirty callback only if the code has been
2029 flushed */
2030 if (!cpu_physical_memory_is_clean(ram_addr)) {
2031 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2035 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2036 unsigned size, bool is_write)
2038 return is_write;
2041 static const MemoryRegionOps notdirty_mem_ops = {
2042 .write = notdirty_mem_write,
2043 .valid.accepts = notdirty_mem_accepts,
2044 .endianness = DEVICE_NATIVE_ENDIAN,
2047 /* Generate a debug exception if a watchpoint has been hit. */
2048 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2050 CPUState *cpu = current_cpu;
2051 CPUClass *cc = CPU_GET_CLASS(cpu);
2052 CPUArchState *env = cpu->env_ptr;
2053 target_ulong pc, cs_base;
2054 target_ulong vaddr;
2055 CPUWatchpoint *wp;
2056 uint32_t cpu_flags;
2058 if (cpu->watchpoint_hit) {
2059 /* We re-entered the check after replacing the TB. Now raise
2060 * the debug interrupt so that is will trigger after the
2061 * current instruction. */
2062 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2063 return;
2065 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2066 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2067 if (cpu_watchpoint_address_matches(wp, vaddr, len)
2068 && (wp->flags & flags)) {
2069 if (flags == BP_MEM_READ) {
2070 wp->flags |= BP_WATCHPOINT_HIT_READ;
2071 } else {
2072 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2074 wp->hitaddr = vaddr;
2075 wp->hitattrs = attrs;
2076 if (!cpu->watchpoint_hit) {
2077 if (wp->flags & BP_CPU &&
2078 !cc->debug_check_watchpoint(cpu, wp)) {
2079 wp->flags &= ~BP_WATCHPOINT_HIT;
2080 continue;
2082 cpu->watchpoint_hit = wp;
2084 /* The tb_lock will be reset when cpu_loop_exit or
2085 * cpu_loop_exit_noexc longjmp back into the cpu_exec
2086 * main loop.
2088 tb_lock();
2089 tb_check_watchpoint(cpu);
2090 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2091 cpu->exception_index = EXCP_DEBUG;
2092 cpu_loop_exit(cpu);
2093 } else {
2094 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2095 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2096 cpu_loop_exit_noexc(cpu);
2099 } else {
2100 wp->flags &= ~BP_WATCHPOINT_HIT;
2105 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2106 so these check for a hit then pass through to the normal out-of-line
2107 phys routines. */
2108 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2109 unsigned size, MemTxAttrs attrs)
2111 MemTxResult res;
2112 uint64_t data;
2113 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2114 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2116 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2117 switch (size) {
2118 case 1:
2119 data = address_space_ldub(as, addr, attrs, &res);
2120 break;
2121 case 2:
2122 data = address_space_lduw(as, addr, attrs, &res);
2123 break;
2124 case 4:
2125 data = address_space_ldl(as, addr, attrs, &res);
2126 break;
2127 default: abort();
2129 *pdata = data;
2130 return res;
2133 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2134 uint64_t val, unsigned size,
2135 MemTxAttrs attrs)
2137 MemTxResult res;
2138 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2139 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2141 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2142 switch (size) {
2143 case 1:
2144 address_space_stb(as, addr, val, attrs, &res);
2145 break;
2146 case 2:
2147 address_space_stw(as, addr, val, attrs, &res);
2148 break;
2149 case 4:
2150 address_space_stl(as, addr, val, attrs, &res);
2151 break;
2152 default: abort();
2154 return res;
2157 static const MemoryRegionOps watch_mem_ops = {
2158 .read_with_attrs = watch_mem_read,
2159 .write_with_attrs = watch_mem_write,
2160 .endianness = DEVICE_NATIVE_ENDIAN,
2163 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2164 unsigned len, MemTxAttrs attrs)
2166 subpage_t *subpage = opaque;
2167 uint8_t buf[8];
2168 MemTxResult res;
2170 #if defined(DEBUG_SUBPAGE)
2171 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2172 subpage, len, addr);
2173 #endif
2174 res = address_space_read(subpage->as, addr + subpage->base,
2175 attrs, buf, len);
2176 if (res) {
2177 return res;
2179 switch (len) {
2180 case 1:
2181 *data = ldub_p(buf);
2182 return MEMTX_OK;
2183 case 2:
2184 *data = lduw_p(buf);
2185 return MEMTX_OK;
2186 case 4:
2187 *data = ldl_p(buf);
2188 return MEMTX_OK;
2189 case 8:
2190 *data = ldq_p(buf);
2191 return MEMTX_OK;
2192 default:
2193 abort();
2197 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2198 uint64_t value, unsigned len, MemTxAttrs attrs)
2200 subpage_t *subpage = opaque;
2201 uint8_t buf[8];
2203 #if defined(DEBUG_SUBPAGE)
2204 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2205 " value %"PRIx64"\n",
2206 __func__, subpage, len, addr, value);
2207 #endif
2208 switch (len) {
2209 case 1:
2210 stb_p(buf, value);
2211 break;
2212 case 2:
2213 stw_p(buf, value);
2214 break;
2215 case 4:
2216 stl_p(buf, value);
2217 break;
2218 case 8:
2219 stq_p(buf, value);
2220 break;
2221 default:
2222 abort();
2224 return address_space_write(subpage->as, addr + subpage->base,
2225 attrs, buf, len);
2228 static bool subpage_accepts(void *opaque, hwaddr addr,
2229 unsigned len, bool is_write)
2231 subpage_t *subpage = opaque;
2232 #if defined(DEBUG_SUBPAGE)
2233 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2234 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2235 #endif
2237 return address_space_access_valid(subpage->as, addr + subpage->base,
2238 len, is_write);
2241 static const MemoryRegionOps subpage_ops = {
2242 .read_with_attrs = subpage_read,
2243 .write_with_attrs = subpage_write,
2244 .impl.min_access_size = 1,
2245 .impl.max_access_size = 8,
2246 .valid.min_access_size = 1,
2247 .valid.max_access_size = 8,
2248 .valid.accepts = subpage_accepts,
2249 .endianness = DEVICE_NATIVE_ENDIAN,
2252 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2253 uint16_t section)
2255 int idx, eidx;
2257 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2258 return -1;
2259 idx = SUBPAGE_IDX(start);
2260 eidx = SUBPAGE_IDX(end);
2261 #if defined(DEBUG_SUBPAGE)
2262 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2263 __func__, mmio, start, end, idx, eidx, section);
2264 #endif
2265 for (; idx <= eidx; idx++) {
2266 mmio->sub_section[idx] = section;
2269 return 0;
2272 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2274 subpage_t *mmio;
2276 mmio = g_malloc0(sizeof(subpage_t) + TARGET_PAGE_SIZE * sizeof(uint16_t));
2277 mmio->as = as;
2278 mmio->base = base;
2279 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2280 NULL, TARGET_PAGE_SIZE);
2281 mmio->iomem.subpage = true;
2282 #if defined(DEBUG_SUBPAGE)
2283 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2284 mmio, base, TARGET_PAGE_SIZE);
2285 #endif
2286 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2288 return mmio;
2291 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2292 MemoryRegion *mr)
2294 assert(as);
2295 MemoryRegionSection section = {
2296 .address_space = as,
2297 .mr = mr,
2298 .offset_within_address_space = 0,
2299 .offset_within_region = 0,
2300 .size = int128_2_64(),
2303 return phys_section_add(map, &section);
2306 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2308 int asidx = cpu_asidx_from_attrs(cpu, attrs);
2309 CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2310 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2311 MemoryRegionSection *sections = d->map.sections;
2313 return sections[index & ~TARGET_PAGE_MASK].mr;
2316 static void io_mem_init(void)
2318 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2319 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2320 NULL, UINT64_MAX);
2321 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2322 NULL, UINT64_MAX);
2323 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2324 NULL, UINT64_MAX);
2327 static void mem_begin(MemoryListener *listener)
2329 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2330 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2331 uint16_t n;
2333 n = dummy_section(&d->map, as, &io_mem_unassigned);
2334 assert(n == PHYS_SECTION_UNASSIGNED);
2335 n = dummy_section(&d->map, as, &io_mem_notdirty);
2336 assert(n == PHYS_SECTION_NOTDIRTY);
2337 n = dummy_section(&d->map, as, &io_mem_rom);
2338 assert(n == PHYS_SECTION_ROM);
2339 n = dummy_section(&d->map, as, &io_mem_watch);
2340 assert(n == PHYS_SECTION_WATCH);
2342 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2343 d->as = as;
2344 as->next_dispatch = d;
2347 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2349 phys_sections_free(&d->map);
2350 g_free(d);
2353 static void mem_commit(MemoryListener *listener)
2355 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2356 AddressSpaceDispatch *cur = as->dispatch;
2357 AddressSpaceDispatch *next = as->next_dispatch;
2359 phys_page_compact_all(next, next->map.nodes_nb);
2361 atomic_rcu_set(&as->dispatch, next);
2362 if (cur) {
2363 call_rcu(cur, address_space_dispatch_free, rcu);
2367 static void tcg_commit(MemoryListener *listener)
2369 CPUAddressSpace *cpuas;
2370 AddressSpaceDispatch *d;
2372 /* since each CPU stores ram addresses in its TLB cache, we must
2373 reset the modified entries */
2374 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2375 cpu_reloading_memory_map();
2376 /* The CPU and TLB are protected by the iothread lock.
2377 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2378 * may have split the RCU critical section.
2380 d = atomic_rcu_read(&cpuas->as->dispatch);
2381 cpuas->memory_dispatch = d;
2382 tlb_flush(cpuas->cpu, 1);
2385 void address_space_init_dispatch(AddressSpace *as)
2387 as->dispatch = NULL;
2388 as->dispatch_listener = (MemoryListener) {
2389 .begin = mem_begin,
2390 .commit = mem_commit,
2391 .region_add = mem_add,
2392 .region_nop = mem_add,
2393 .priority = 0,
2395 memory_listener_register(&as->dispatch_listener, as);
2398 void address_space_unregister(AddressSpace *as)
2400 memory_listener_unregister(&as->dispatch_listener);
2403 void address_space_destroy_dispatch(AddressSpace *as)
2405 AddressSpaceDispatch *d = as->dispatch;
2407 atomic_rcu_set(&as->dispatch, NULL);
2408 if (d) {
2409 call_rcu(d, address_space_dispatch_free, rcu);
2413 static void memory_map_init(void)
2415 system_memory = g_malloc(sizeof(*system_memory));
2417 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2418 address_space_init(&address_space_memory, system_memory, "memory");
2420 system_io = g_malloc(sizeof(*system_io));
2421 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2422 65536);
2423 address_space_init(&address_space_io, system_io, "I/O");
2426 MemoryRegion *get_system_memory(void)
2428 return system_memory;
2431 MemoryRegion *get_system_io(void)
2433 return system_io;
2436 #endif /* !defined(CONFIG_USER_ONLY) */
2438 /* physical memory access (slow version, mainly for debug) */
2439 #if defined(CONFIG_USER_ONLY)
2440 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2441 uint8_t *buf, int len, int is_write)
2443 int l, flags;
2444 target_ulong page;
2445 void * p;
2447 while (len > 0) {
2448 page = addr & TARGET_PAGE_MASK;
2449 l = (page + TARGET_PAGE_SIZE) - addr;
2450 if (l > len)
2451 l = len;
2452 flags = page_get_flags(page);
2453 if (!(flags & PAGE_VALID))
2454 return -1;
2455 if (is_write) {
2456 if (!(flags & PAGE_WRITE))
2457 return -1;
2458 /* XXX: this code should not depend on lock_user */
2459 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2460 return -1;
2461 memcpy(p, buf, l);
2462 unlock_user(p, addr, l);
2463 } else {
2464 if (!(flags & PAGE_READ))
2465 return -1;
2466 /* XXX: this code should not depend on lock_user */
2467 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2468 return -1;
2469 memcpy(buf, p, l);
2470 unlock_user(p, addr, 0);
2472 len -= l;
2473 buf += l;
2474 addr += l;
2476 return 0;
2479 #else
2481 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2482 hwaddr length)
2484 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2485 addr += memory_region_get_ram_addr(mr);
2487 /* No early return if dirty_log_mask is or becomes 0, because
2488 * cpu_physical_memory_set_dirty_range will still call
2489 * xen_modified_memory.
2491 if (dirty_log_mask) {
2492 dirty_log_mask =
2493 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2495 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2496 tb_lock();
2497 tb_invalidate_phys_range(addr, addr + length);
2498 tb_unlock();
2499 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2501 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2504 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2506 unsigned access_size_max = mr->ops->valid.max_access_size;
2508 /* Regions are assumed to support 1-4 byte accesses unless
2509 otherwise specified. */
2510 if (access_size_max == 0) {
2511 access_size_max = 4;
2514 /* Bound the maximum access by the alignment of the address. */
2515 if (!mr->ops->impl.unaligned) {
2516 unsigned align_size_max = addr & -addr;
2517 if (align_size_max != 0 && align_size_max < access_size_max) {
2518 access_size_max = align_size_max;
2522 /* Don't attempt accesses larger than the maximum. */
2523 if (l > access_size_max) {
2524 l = access_size_max;
2526 l = pow2floor(l);
2528 return l;
2531 static bool prepare_mmio_access(MemoryRegion *mr)
2533 bool unlocked = !qemu_mutex_iothread_locked();
2534 bool release_lock = false;
2536 if (unlocked && mr->global_locking) {
2537 qemu_mutex_lock_iothread();
2538 unlocked = false;
2539 release_lock = true;
2541 if (mr->flush_coalesced_mmio) {
2542 if (unlocked) {
2543 qemu_mutex_lock_iothread();
2545 qemu_flush_coalesced_mmio_buffer();
2546 if (unlocked) {
2547 qemu_mutex_unlock_iothread();
2551 return release_lock;
2554 /* Called within RCU critical section. */
2555 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2556 MemTxAttrs attrs,
2557 const uint8_t *buf,
2558 int len, hwaddr addr1,
2559 hwaddr l, MemoryRegion *mr)
2561 uint8_t *ptr;
2562 uint64_t val;
2563 MemTxResult result = MEMTX_OK;
2564 bool release_lock = false;
2566 for (;;) {
2567 if (!memory_access_is_direct(mr, true)) {
2568 release_lock |= prepare_mmio_access(mr);
2569 l = memory_access_size(mr, l, addr1);
2570 /* XXX: could force current_cpu to NULL to avoid
2571 potential bugs */
2572 switch (l) {
2573 case 8:
2574 /* 64 bit write access */
2575 val = ldq_p(buf);
2576 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2577 attrs);
2578 break;
2579 case 4:
2580 /* 32 bit write access */
2581 val = ldl_p(buf);
2582 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2583 attrs);
2584 break;
2585 case 2:
2586 /* 16 bit write access */
2587 val = lduw_p(buf);
2588 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2589 attrs);
2590 break;
2591 case 1:
2592 /* 8 bit write access */
2593 val = ldub_p(buf);
2594 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2595 attrs);
2596 break;
2597 default:
2598 abort();
2600 } else {
2601 /* RAM case */
2602 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2603 memcpy(ptr, buf, l);
2604 invalidate_and_set_dirty(mr, addr1, l);
2607 if (release_lock) {
2608 qemu_mutex_unlock_iothread();
2609 release_lock = false;
2612 len -= l;
2613 buf += l;
2614 addr += l;
2616 if (!len) {
2617 break;
2620 l = len;
2621 mr = address_space_translate(as, addr, &addr1, &l, true);
2624 return result;
2627 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2628 const uint8_t *buf, int len)
2630 hwaddr l;
2631 hwaddr addr1;
2632 MemoryRegion *mr;
2633 MemTxResult result = MEMTX_OK;
2635 if (len > 0) {
2636 rcu_read_lock();
2637 l = len;
2638 mr = address_space_translate(as, addr, &addr1, &l, true);
2639 result = address_space_write_continue(as, addr, attrs, buf, len,
2640 addr1, l, mr);
2641 rcu_read_unlock();
2644 return result;
2647 /* Called within RCU critical section. */
2648 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2649 MemTxAttrs attrs, uint8_t *buf,
2650 int len, hwaddr addr1, hwaddr l,
2651 MemoryRegion *mr)
2653 uint8_t *ptr;
2654 uint64_t val;
2655 MemTxResult result = MEMTX_OK;
2656 bool release_lock = false;
2658 for (;;) {
2659 if (!memory_access_is_direct(mr, false)) {
2660 /* I/O case */
2661 release_lock |= prepare_mmio_access(mr);
2662 l = memory_access_size(mr, l, addr1);
2663 switch (l) {
2664 case 8:
2665 /* 64 bit read access */
2666 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2667 attrs);
2668 stq_p(buf, val);
2669 break;
2670 case 4:
2671 /* 32 bit read access */
2672 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2673 attrs);
2674 stl_p(buf, val);
2675 break;
2676 case 2:
2677 /* 16 bit read access */
2678 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2679 attrs);
2680 stw_p(buf, val);
2681 break;
2682 case 1:
2683 /* 8 bit read access */
2684 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2685 attrs);
2686 stb_p(buf, val);
2687 break;
2688 default:
2689 abort();
2691 } else {
2692 /* RAM case */
2693 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2694 memcpy(buf, ptr, l);
2697 if (release_lock) {
2698 qemu_mutex_unlock_iothread();
2699 release_lock = false;
2702 len -= l;
2703 buf += l;
2704 addr += l;
2706 if (!len) {
2707 break;
2710 l = len;
2711 mr = address_space_translate(as, addr, &addr1, &l, false);
2714 return result;
2717 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2718 MemTxAttrs attrs, uint8_t *buf, int len)
2720 hwaddr l;
2721 hwaddr addr1;
2722 MemoryRegion *mr;
2723 MemTxResult result = MEMTX_OK;
2725 if (len > 0) {
2726 rcu_read_lock();
2727 l = len;
2728 mr = address_space_translate(as, addr, &addr1, &l, false);
2729 result = address_space_read_continue(as, addr, attrs, buf, len,
2730 addr1, l, mr);
2731 rcu_read_unlock();
2734 return result;
2737 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2738 uint8_t *buf, int len, bool is_write)
2740 if (is_write) {
2741 return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2742 } else {
2743 return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2747 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2748 int len, int is_write)
2750 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2751 buf, len, is_write);
2754 enum write_rom_type {
2755 WRITE_DATA,
2756 FLUSH_CACHE,
2759 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2760 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2762 hwaddr l;
2763 uint8_t *ptr;
2764 hwaddr addr1;
2765 MemoryRegion *mr;
2767 rcu_read_lock();
2768 while (len > 0) {
2769 l = len;
2770 mr = address_space_translate(as, addr, &addr1, &l, true);
2772 if (!(memory_region_is_ram(mr) ||
2773 memory_region_is_romd(mr))) {
2774 l = memory_access_size(mr, l, addr1);
2775 } else {
2776 /* ROM/RAM case */
2777 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2778 switch (type) {
2779 case WRITE_DATA:
2780 memcpy(ptr, buf, l);
2781 invalidate_and_set_dirty(mr, addr1, l);
2782 break;
2783 case FLUSH_CACHE:
2784 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2785 break;
2788 len -= l;
2789 buf += l;
2790 addr += l;
2792 rcu_read_unlock();
2795 /* used for ROM loading : can write in RAM and ROM */
2796 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2797 const uint8_t *buf, int len)
2799 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2802 void cpu_flush_icache_range(hwaddr start, int len)
2805 * This function should do the same thing as an icache flush that was
2806 * triggered from within the guest. For TCG we are always cache coherent,
2807 * so there is no need to flush anything. For KVM / Xen we need to flush
2808 * the host's instruction cache at least.
2810 if (tcg_enabled()) {
2811 return;
2814 cpu_physical_memory_write_rom_internal(&address_space_memory,
2815 start, NULL, len, FLUSH_CACHE);
2818 typedef struct {
2819 MemoryRegion *mr;
2820 void *buffer;
2821 hwaddr addr;
2822 hwaddr len;
2823 bool in_use;
2824 } BounceBuffer;
2826 static BounceBuffer bounce;
2828 typedef struct MapClient {
2829 QEMUBH *bh;
2830 QLIST_ENTRY(MapClient) link;
2831 } MapClient;
2833 QemuMutex map_client_list_lock;
2834 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2835 = QLIST_HEAD_INITIALIZER(map_client_list);
2837 static void cpu_unregister_map_client_do(MapClient *client)
2839 QLIST_REMOVE(client, link);
2840 g_free(client);
2843 static void cpu_notify_map_clients_locked(void)
2845 MapClient *client;
2847 while (!QLIST_EMPTY(&map_client_list)) {
2848 client = QLIST_FIRST(&map_client_list);
2849 qemu_bh_schedule(client->bh);
2850 cpu_unregister_map_client_do(client);
2854 void cpu_register_map_client(QEMUBH *bh)
2856 MapClient *client = g_malloc(sizeof(*client));
2858 qemu_mutex_lock(&map_client_list_lock);
2859 client->bh = bh;
2860 QLIST_INSERT_HEAD(&map_client_list, client, link);
2861 if (!atomic_read(&bounce.in_use)) {
2862 cpu_notify_map_clients_locked();
2864 qemu_mutex_unlock(&map_client_list_lock);
2867 void cpu_exec_init_all(void)
2869 qemu_mutex_init(&ram_list.mutex);
2870 /* The data structures we set up here depend on knowing the page size,
2871 * so no more changes can be made after this point.
2872 * In an ideal world, nothing we did before we had finished the
2873 * machine setup would care about the target page size, and we could
2874 * do this much later, rather than requiring board models to state
2875 * up front what their requirements are.
2877 finalize_target_page_bits();
2878 io_mem_init();
2879 memory_map_init();
2880 qemu_mutex_init(&map_client_list_lock);
2883 void cpu_unregister_map_client(QEMUBH *bh)
2885 MapClient *client;
2887 qemu_mutex_lock(&map_client_list_lock);
2888 QLIST_FOREACH(client, &map_client_list, link) {
2889 if (client->bh == bh) {
2890 cpu_unregister_map_client_do(client);
2891 break;
2894 qemu_mutex_unlock(&map_client_list_lock);
2897 static void cpu_notify_map_clients(void)
2899 qemu_mutex_lock(&map_client_list_lock);
2900 cpu_notify_map_clients_locked();
2901 qemu_mutex_unlock(&map_client_list_lock);
2904 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2906 MemoryRegion *mr;
2907 hwaddr l, xlat;
2909 rcu_read_lock();
2910 while (len > 0) {
2911 l = len;
2912 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2913 if (!memory_access_is_direct(mr, is_write)) {
2914 l = memory_access_size(mr, l, addr);
2915 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2916 return false;
2920 len -= l;
2921 addr += l;
2923 rcu_read_unlock();
2924 return true;
2927 /* Map a physical memory region into a host virtual address.
2928 * May map a subset of the requested range, given by and returned in *plen.
2929 * May return NULL if resources needed to perform the mapping are exhausted.
2930 * Use only for reads OR writes - not for read-modify-write operations.
2931 * Use cpu_register_map_client() to know when retrying the map operation is
2932 * likely to succeed.
2934 void *address_space_map(AddressSpace *as,
2935 hwaddr addr,
2936 hwaddr *plen,
2937 bool is_write)
2939 hwaddr len = *plen;
2940 hwaddr done = 0;
2941 hwaddr l, xlat, base;
2942 MemoryRegion *mr, *this_mr;
2943 void *ptr;
2945 if (len == 0) {
2946 return NULL;
2949 l = len;
2950 rcu_read_lock();
2951 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2953 if (!memory_access_is_direct(mr, is_write)) {
2954 if (atomic_xchg(&bounce.in_use, true)) {
2955 rcu_read_unlock();
2956 return NULL;
2958 /* Avoid unbounded allocations */
2959 l = MIN(l, TARGET_PAGE_SIZE);
2960 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2961 bounce.addr = addr;
2962 bounce.len = l;
2964 memory_region_ref(mr);
2965 bounce.mr = mr;
2966 if (!is_write) {
2967 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2968 bounce.buffer, l);
2971 rcu_read_unlock();
2972 *plen = l;
2973 return bounce.buffer;
2976 base = xlat;
2978 for (;;) {
2979 len -= l;
2980 addr += l;
2981 done += l;
2982 if (len == 0) {
2983 break;
2986 l = len;
2987 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2988 if (this_mr != mr || xlat != base + done) {
2989 break;
2993 memory_region_ref(mr);
2994 *plen = done;
2995 ptr = qemu_ram_ptr_length(mr->ram_block, base, plen);
2996 rcu_read_unlock();
2998 return ptr;
3001 /* Unmaps a memory region previously mapped by address_space_map().
3002 * Will also mark the memory as dirty if is_write == 1. access_len gives
3003 * the amount of memory that was actually read or written by the caller.
3005 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3006 int is_write, hwaddr access_len)
3008 if (buffer != bounce.buffer) {
3009 MemoryRegion *mr;
3010 ram_addr_t addr1;
3012 mr = memory_region_from_host(buffer, &addr1);
3013 assert(mr != NULL);
3014 if (is_write) {
3015 invalidate_and_set_dirty(mr, addr1, access_len);
3017 if (xen_enabled()) {
3018 xen_invalidate_map_cache_entry(buffer);
3020 memory_region_unref(mr);
3021 return;
3023 if (is_write) {
3024 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
3025 bounce.buffer, access_len);
3027 qemu_vfree(bounce.buffer);
3028 bounce.buffer = NULL;
3029 memory_region_unref(bounce.mr);
3030 atomic_mb_set(&bounce.in_use, false);
3031 cpu_notify_map_clients();
3034 void *cpu_physical_memory_map(hwaddr addr,
3035 hwaddr *plen,
3036 int is_write)
3038 return address_space_map(&address_space_memory, addr, plen, is_write);
3041 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3042 int is_write, hwaddr access_len)
3044 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3047 /* warning: addr must be aligned */
3048 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3049 MemTxAttrs attrs,
3050 MemTxResult *result,
3051 enum device_endian endian)
3053 uint8_t *ptr;
3054 uint64_t val;
3055 MemoryRegion *mr;
3056 hwaddr l = 4;
3057 hwaddr addr1;
3058 MemTxResult r;
3059 bool release_lock = false;
3061 rcu_read_lock();
3062 mr = address_space_translate(as, addr, &addr1, &l, false);
3063 if (l < 4 || !memory_access_is_direct(mr, false)) {
3064 release_lock |= prepare_mmio_access(mr);
3066 /* I/O case */
3067 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3068 #if defined(TARGET_WORDS_BIGENDIAN)
3069 if (endian == DEVICE_LITTLE_ENDIAN) {
3070 val = bswap32(val);
3072 #else
3073 if (endian == DEVICE_BIG_ENDIAN) {
3074 val = bswap32(val);
3076 #endif
3077 } else {
3078 /* RAM case */
3079 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3080 switch (endian) {
3081 case DEVICE_LITTLE_ENDIAN:
3082 val = ldl_le_p(ptr);
3083 break;
3084 case DEVICE_BIG_ENDIAN:
3085 val = ldl_be_p(ptr);
3086 break;
3087 default:
3088 val = ldl_p(ptr);
3089 break;
3091 r = MEMTX_OK;
3093 if (result) {
3094 *result = r;
3096 if (release_lock) {
3097 qemu_mutex_unlock_iothread();
3099 rcu_read_unlock();
3100 return val;
3103 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3104 MemTxAttrs attrs, MemTxResult *result)
3106 return address_space_ldl_internal(as, addr, attrs, result,
3107 DEVICE_NATIVE_ENDIAN);
3110 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3111 MemTxAttrs attrs, MemTxResult *result)
3113 return address_space_ldl_internal(as, addr, attrs, result,
3114 DEVICE_LITTLE_ENDIAN);
3117 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3118 MemTxAttrs attrs, MemTxResult *result)
3120 return address_space_ldl_internal(as, addr, attrs, result,
3121 DEVICE_BIG_ENDIAN);
3124 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3126 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3129 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3131 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3134 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3136 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3139 /* warning: addr must be aligned */
3140 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3141 MemTxAttrs attrs,
3142 MemTxResult *result,
3143 enum device_endian endian)
3145 uint8_t *ptr;
3146 uint64_t val;
3147 MemoryRegion *mr;
3148 hwaddr l = 8;
3149 hwaddr addr1;
3150 MemTxResult r;
3151 bool release_lock = false;
3153 rcu_read_lock();
3154 mr = address_space_translate(as, addr, &addr1, &l,
3155 false);
3156 if (l < 8 || !memory_access_is_direct(mr, false)) {
3157 release_lock |= prepare_mmio_access(mr);
3159 /* I/O case */
3160 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3161 #if defined(TARGET_WORDS_BIGENDIAN)
3162 if (endian == DEVICE_LITTLE_ENDIAN) {
3163 val = bswap64(val);
3165 #else
3166 if (endian == DEVICE_BIG_ENDIAN) {
3167 val = bswap64(val);
3169 #endif
3170 } else {
3171 /* RAM case */
3172 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3173 switch (endian) {
3174 case DEVICE_LITTLE_ENDIAN:
3175 val = ldq_le_p(ptr);
3176 break;
3177 case DEVICE_BIG_ENDIAN:
3178 val = ldq_be_p(ptr);
3179 break;
3180 default:
3181 val = ldq_p(ptr);
3182 break;
3184 r = MEMTX_OK;
3186 if (result) {
3187 *result = r;
3189 if (release_lock) {
3190 qemu_mutex_unlock_iothread();
3192 rcu_read_unlock();
3193 return val;
3196 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3197 MemTxAttrs attrs, MemTxResult *result)
3199 return address_space_ldq_internal(as, addr, attrs, result,
3200 DEVICE_NATIVE_ENDIAN);
3203 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3204 MemTxAttrs attrs, MemTxResult *result)
3206 return address_space_ldq_internal(as, addr, attrs, result,
3207 DEVICE_LITTLE_ENDIAN);
3210 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3211 MemTxAttrs attrs, MemTxResult *result)
3213 return address_space_ldq_internal(as, addr, attrs, result,
3214 DEVICE_BIG_ENDIAN);
3217 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3219 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3222 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3224 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3227 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3229 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3232 /* XXX: optimize */
3233 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3234 MemTxAttrs attrs, MemTxResult *result)
3236 uint8_t val;
3237 MemTxResult r;
3239 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3240 if (result) {
3241 *result = r;
3243 return val;
3246 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3248 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3251 /* warning: addr must be aligned */
3252 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3253 hwaddr addr,
3254 MemTxAttrs attrs,
3255 MemTxResult *result,
3256 enum device_endian endian)
3258 uint8_t *ptr;
3259 uint64_t val;
3260 MemoryRegion *mr;
3261 hwaddr l = 2;
3262 hwaddr addr1;
3263 MemTxResult r;
3264 bool release_lock = false;
3266 rcu_read_lock();
3267 mr = address_space_translate(as, addr, &addr1, &l,
3268 false);
3269 if (l < 2 || !memory_access_is_direct(mr, false)) {
3270 release_lock |= prepare_mmio_access(mr);
3272 /* I/O case */
3273 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3274 #if defined(TARGET_WORDS_BIGENDIAN)
3275 if (endian == DEVICE_LITTLE_ENDIAN) {
3276 val = bswap16(val);
3278 #else
3279 if (endian == DEVICE_BIG_ENDIAN) {
3280 val = bswap16(val);
3282 #endif
3283 } else {
3284 /* RAM case */
3285 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3286 switch (endian) {
3287 case DEVICE_LITTLE_ENDIAN:
3288 val = lduw_le_p(ptr);
3289 break;
3290 case DEVICE_BIG_ENDIAN:
3291 val = lduw_be_p(ptr);
3292 break;
3293 default:
3294 val = lduw_p(ptr);
3295 break;
3297 r = MEMTX_OK;
3299 if (result) {
3300 *result = r;
3302 if (release_lock) {
3303 qemu_mutex_unlock_iothread();
3305 rcu_read_unlock();
3306 return val;
3309 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3310 MemTxAttrs attrs, MemTxResult *result)
3312 return address_space_lduw_internal(as, addr, attrs, result,
3313 DEVICE_NATIVE_ENDIAN);
3316 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3317 MemTxAttrs attrs, MemTxResult *result)
3319 return address_space_lduw_internal(as, addr, attrs, result,
3320 DEVICE_LITTLE_ENDIAN);
3323 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3324 MemTxAttrs attrs, MemTxResult *result)
3326 return address_space_lduw_internal(as, addr, attrs, result,
3327 DEVICE_BIG_ENDIAN);
3330 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3332 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3335 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3337 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3340 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3342 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3345 /* warning: addr must be aligned. The ram page is not masked as dirty
3346 and the code inside is not invalidated. It is useful if the dirty
3347 bits are used to track modified PTEs */
3348 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3349 MemTxAttrs attrs, MemTxResult *result)
3351 uint8_t *ptr;
3352 MemoryRegion *mr;
3353 hwaddr l = 4;
3354 hwaddr addr1;
3355 MemTxResult r;
3356 uint8_t dirty_log_mask;
3357 bool release_lock = false;
3359 rcu_read_lock();
3360 mr = address_space_translate(as, addr, &addr1, &l,
3361 true);
3362 if (l < 4 || !memory_access_is_direct(mr, true)) {
3363 release_lock |= prepare_mmio_access(mr);
3365 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3366 } else {
3367 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3368 stl_p(ptr, val);
3370 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3371 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3372 cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr,
3373 4, dirty_log_mask);
3374 r = MEMTX_OK;
3376 if (result) {
3377 *result = r;
3379 if (release_lock) {
3380 qemu_mutex_unlock_iothread();
3382 rcu_read_unlock();
3385 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3387 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3390 /* warning: addr must be aligned */
3391 static inline void address_space_stl_internal(AddressSpace *as,
3392 hwaddr addr, uint32_t val,
3393 MemTxAttrs attrs,
3394 MemTxResult *result,
3395 enum device_endian endian)
3397 uint8_t *ptr;
3398 MemoryRegion *mr;
3399 hwaddr l = 4;
3400 hwaddr addr1;
3401 MemTxResult r;
3402 bool release_lock = false;
3404 rcu_read_lock();
3405 mr = address_space_translate(as, addr, &addr1, &l,
3406 true);
3407 if (l < 4 || !memory_access_is_direct(mr, true)) {
3408 release_lock |= prepare_mmio_access(mr);
3410 #if defined(TARGET_WORDS_BIGENDIAN)
3411 if (endian == DEVICE_LITTLE_ENDIAN) {
3412 val = bswap32(val);
3414 #else
3415 if (endian == DEVICE_BIG_ENDIAN) {
3416 val = bswap32(val);
3418 #endif
3419 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3420 } else {
3421 /* RAM case */
3422 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3423 switch (endian) {
3424 case DEVICE_LITTLE_ENDIAN:
3425 stl_le_p(ptr, val);
3426 break;
3427 case DEVICE_BIG_ENDIAN:
3428 stl_be_p(ptr, val);
3429 break;
3430 default:
3431 stl_p(ptr, val);
3432 break;
3434 invalidate_and_set_dirty(mr, addr1, 4);
3435 r = MEMTX_OK;
3437 if (result) {
3438 *result = r;
3440 if (release_lock) {
3441 qemu_mutex_unlock_iothread();
3443 rcu_read_unlock();
3446 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3447 MemTxAttrs attrs, MemTxResult *result)
3449 address_space_stl_internal(as, addr, val, attrs, result,
3450 DEVICE_NATIVE_ENDIAN);
3453 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3454 MemTxAttrs attrs, MemTxResult *result)
3456 address_space_stl_internal(as, addr, val, attrs, result,
3457 DEVICE_LITTLE_ENDIAN);
3460 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3461 MemTxAttrs attrs, MemTxResult *result)
3463 address_space_stl_internal(as, addr, val, attrs, result,
3464 DEVICE_BIG_ENDIAN);
3467 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3469 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3472 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3474 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3477 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3479 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3482 /* XXX: optimize */
3483 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3484 MemTxAttrs attrs, MemTxResult *result)
3486 uint8_t v = val;
3487 MemTxResult r;
3489 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3490 if (result) {
3491 *result = r;
3495 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3497 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3500 /* warning: addr must be aligned */
3501 static inline void address_space_stw_internal(AddressSpace *as,
3502 hwaddr addr, uint32_t val,
3503 MemTxAttrs attrs,
3504 MemTxResult *result,
3505 enum device_endian endian)
3507 uint8_t *ptr;
3508 MemoryRegion *mr;
3509 hwaddr l = 2;
3510 hwaddr addr1;
3511 MemTxResult r;
3512 bool release_lock = false;
3514 rcu_read_lock();
3515 mr = address_space_translate(as, addr, &addr1, &l, true);
3516 if (l < 2 || !memory_access_is_direct(mr, true)) {
3517 release_lock |= prepare_mmio_access(mr);
3519 #if defined(TARGET_WORDS_BIGENDIAN)
3520 if (endian == DEVICE_LITTLE_ENDIAN) {
3521 val = bswap16(val);
3523 #else
3524 if (endian == DEVICE_BIG_ENDIAN) {
3525 val = bswap16(val);
3527 #endif
3528 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3529 } else {
3530 /* RAM case */
3531 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3532 switch (endian) {
3533 case DEVICE_LITTLE_ENDIAN:
3534 stw_le_p(ptr, val);
3535 break;
3536 case DEVICE_BIG_ENDIAN:
3537 stw_be_p(ptr, val);
3538 break;
3539 default:
3540 stw_p(ptr, val);
3541 break;
3543 invalidate_and_set_dirty(mr, addr1, 2);
3544 r = MEMTX_OK;
3546 if (result) {
3547 *result = r;
3549 if (release_lock) {
3550 qemu_mutex_unlock_iothread();
3552 rcu_read_unlock();
3555 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3556 MemTxAttrs attrs, MemTxResult *result)
3558 address_space_stw_internal(as, addr, val, attrs, result,
3559 DEVICE_NATIVE_ENDIAN);
3562 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3563 MemTxAttrs attrs, MemTxResult *result)
3565 address_space_stw_internal(as, addr, val, attrs, result,
3566 DEVICE_LITTLE_ENDIAN);
3569 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3570 MemTxAttrs attrs, MemTxResult *result)
3572 address_space_stw_internal(as, addr, val, attrs, result,
3573 DEVICE_BIG_ENDIAN);
3576 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3578 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3581 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3583 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3586 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3588 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3591 /* XXX: optimize */
3592 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3593 MemTxAttrs attrs, MemTxResult *result)
3595 MemTxResult r;
3596 val = tswap64(val);
3597 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3598 if (result) {
3599 *result = r;
3603 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3604 MemTxAttrs attrs, MemTxResult *result)
3606 MemTxResult r;
3607 val = cpu_to_le64(val);
3608 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3609 if (result) {
3610 *result = r;
3613 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3614 MemTxAttrs attrs, MemTxResult *result)
3616 MemTxResult r;
3617 val = cpu_to_be64(val);
3618 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3619 if (result) {
3620 *result = r;
3624 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3626 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3629 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3631 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3634 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3636 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3639 /* virtual memory access for debug (includes writing to ROM) */
3640 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3641 uint8_t *buf, int len, int is_write)
3643 int l;
3644 hwaddr phys_addr;
3645 target_ulong page;
3647 while (len > 0) {
3648 int asidx;
3649 MemTxAttrs attrs;
3651 page = addr & TARGET_PAGE_MASK;
3652 phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3653 asidx = cpu_asidx_from_attrs(cpu, attrs);
3654 /* if no physical page mapped, return an error */
3655 if (phys_addr == -1)
3656 return -1;
3657 l = (page + TARGET_PAGE_SIZE) - addr;
3658 if (l > len)
3659 l = len;
3660 phys_addr += (addr & ~TARGET_PAGE_MASK);
3661 if (is_write) {
3662 cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3663 phys_addr, buf, l);
3664 } else {
3665 address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3666 MEMTXATTRS_UNSPECIFIED,
3667 buf, l, 0);
3669 len -= l;
3670 buf += l;
3671 addr += l;
3673 return 0;
3677 * Allows code that needs to deal with migration bitmaps etc to still be built
3678 * target independent.
3680 size_t qemu_target_page_bits(void)
3682 return TARGET_PAGE_BITS;
3685 #endif
3688 * A helper function for the _utterly broken_ virtio device model to find out if
3689 * it's running on a big endian machine. Don't do this at home kids!
3691 bool target_words_bigendian(void);
3692 bool target_words_bigendian(void)
3694 #if defined(TARGET_WORDS_BIGENDIAN)
3695 return true;
3696 #else
3697 return false;
3698 #endif
3701 #ifndef CONFIG_USER_ONLY
3702 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3704 MemoryRegion*mr;
3705 hwaddr l = 1;
3706 bool res;
3708 rcu_read_lock();
3709 mr = address_space_translate(&address_space_memory,
3710 phys_addr, &phys_addr, &l, false);
3712 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3713 rcu_read_unlock();
3714 return res;
3717 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3719 RAMBlock *block;
3720 int ret = 0;
3722 rcu_read_lock();
3723 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3724 ret = func(block->idstr, block->host, block->offset,
3725 block->used_length, opaque);
3726 if (ret) {
3727 break;
3730 rcu_read_unlock();
3731 return ret;
3733 #endif