Update version for 2.8.1 release
[qemu.git] / exec.c
blob6fda4553fa8604fd7c4f9c919409f340f80c4929
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include "qapi/error.h"
21 #ifndef _WIN32
22 #endif
24 #include "qemu/cutils.h"
25 #include "cpu.h"
26 #include "exec/exec-all.h"
27 #include "tcg.h"
28 #include "hw/qdev-core.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
31 #include "hw/xen/xen.h"
32 #endif
33 #include "sysemu/kvm.h"
34 #include "sysemu/sysemu.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "qemu/error-report.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include "qemu.h"
40 #else /* !CONFIG_USER_ONLY */
41 #include "hw/hw.h"
42 #include "exec/memory.h"
43 #include "exec/ioport.h"
44 #include "sysemu/dma.h"
45 #include "exec/address-spaces.h"
46 #include "sysemu/xen-mapcache.h"
47 #include "trace.h"
48 #endif
49 #include "exec/cpu-all.h"
50 #include "qemu/rcu_queue.h"
51 #include "qemu/main-loop.h"
52 #include "translate-all.h"
53 #include "sysemu/replay.h"
55 #include "exec/memory-internal.h"
56 #include "exec/ram_addr.h"
57 #include "exec/log.h"
59 #include "migration/vmstate.h"
61 #include "qemu/range.h"
62 #ifndef _WIN32
63 #include "qemu/mmap-alloc.h"
64 #endif
66 //#define DEBUG_SUBPAGE
68 #if !defined(CONFIG_USER_ONLY)
69 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
70 * are protected by the ramlist lock.
72 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
74 static MemoryRegion *system_memory;
75 static MemoryRegion *system_io;
77 AddressSpace address_space_io;
78 AddressSpace address_space_memory;
80 MemoryRegion io_mem_rom, io_mem_notdirty;
81 static MemoryRegion io_mem_unassigned;
83 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
84 #define RAM_PREALLOC (1 << 0)
86 /* RAM is mmap-ed with MAP_SHARED */
87 #define RAM_SHARED (1 << 1)
89 /* Only a portion of RAM (used_length) is actually used, and migrated.
90 * This used_length size can change across reboots.
92 #define RAM_RESIZEABLE (1 << 2)
94 #endif
96 #ifdef TARGET_PAGE_BITS_VARY
97 int target_page_bits;
98 bool target_page_bits_decided;
99 #endif
101 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
102 /* current CPU in the current thread. It is only valid inside
103 cpu_exec() */
104 __thread CPUState *current_cpu;
105 /* 0 = Do not count executed instructions.
106 1 = Precise instruction counting.
107 2 = Adaptive rate instruction counting. */
108 int use_icount;
110 bool set_preferred_target_page_bits(int bits)
112 /* The target page size is the lowest common denominator for all
113 * the CPUs in the system, so we can only make it smaller, never
114 * larger. And we can't make it smaller once we've committed to
115 * a particular size.
117 #ifdef TARGET_PAGE_BITS_VARY
118 assert(bits >= TARGET_PAGE_BITS_MIN);
119 if (target_page_bits == 0 || target_page_bits > bits) {
120 if (target_page_bits_decided) {
121 return false;
123 target_page_bits = bits;
125 #endif
126 return true;
129 #if !defined(CONFIG_USER_ONLY)
131 static void finalize_target_page_bits(void)
133 #ifdef TARGET_PAGE_BITS_VARY
134 if (target_page_bits == 0) {
135 target_page_bits = TARGET_PAGE_BITS_MIN;
137 target_page_bits_decided = true;
138 #endif
141 typedef struct PhysPageEntry PhysPageEntry;
143 struct PhysPageEntry {
144 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
145 uint32_t skip : 6;
146 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
147 uint32_t ptr : 26;
150 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
152 /* Size of the L2 (and L3, etc) page tables. */
153 #define ADDR_SPACE_BITS 64
155 #define P_L2_BITS 9
156 #define P_L2_SIZE (1 << P_L2_BITS)
158 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
160 typedef PhysPageEntry Node[P_L2_SIZE];
162 typedef struct PhysPageMap {
163 struct rcu_head rcu;
165 unsigned sections_nb;
166 unsigned sections_nb_alloc;
167 unsigned nodes_nb;
168 unsigned nodes_nb_alloc;
169 Node *nodes;
170 MemoryRegionSection *sections;
171 } PhysPageMap;
173 struct AddressSpaceDispatch {
174 struct rcu_head rcu;
176 MemoryRegionSection *mru_section;
177 /* This is a multi-level map on the physical address space.
178 * The bottom level has pointers to MemoryRegionSections.
180 PhysPageEntry phys_map;
181 PhysPageMap map;
182 AddressSpace *as;
185 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
186 typedef struct subpage_t {
187 MemoryRegion iomem;
188 AddressSpace *as;
189 hwaddr base;
190 uint16_t sub_section[];
191 } subpage_t;
193 #define PHYS_SECTION_UNASSIGNED 0
194 #define PHYS_SECTION_NOTDIRTY 1
195 #define PHYS_SECTION_ROM 2
196 #define PHYS_SECTION_WATCH 3
198 static void io_mem_init(void);
199 static void memory_map_init(void);
200 static void tcg_commit(MemoryListener *listener);
202 static MemoryRegion io_mem_watch;
205 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
206 * @cpu: the CPU whose AddressSpace this is
207 * @as: the AddressSpace itself
208 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
209 * @tcg_as_listener: listener for tracking changes to the AddressSpace
211 struct CPUAddressSpace {
212 CPUState *cpu;
213 AddressSpace *as;
214 struct AddressSpaceDispatch *memory_dispatch;
215 MemoryListener tcg_as_listener;
218 #endif
220 #if !defined(CONFIG_USER_ONLY)
222 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
224 static unsigned alloc_hint = 16;
225 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
226 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, alloc_hint);
227 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
228 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
229 alloc_hint = map->nodes_nb_alloc;
233 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
235 unsigned i;
236 uint32_t ret;
237 PhysPageEntry e;
238 PhysPageEntry *p;
240 ret = map->nodes_nb++;
241 p = map->nodes[ret];
242 assert(ret != PHYS_MAP_NODE_NIL);
243 assert(ret != map->nodes_nb_alloc);
245 e.skip = leaf ? 0 : 1;
246 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
247 for (i = 0; i < P_L2_SIZE; ++i) {
248 memcpy(&p[i], &e, sizeof(e));
250 return ret;
253 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
254 hwaddr *index, hwaddr *nb, uint16_t leaf,
255 int level)
257 PhysPageEntry *p;
258 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
260 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
261 lp->ptr = phys_map_node_alloc(map, level == 0);
263 p = map->nodes[lp->ptr];
264 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
266 while (*nb && lp < &p[P_L2_SIZE]) {
267 if ((*index & (step - 1)) == 0 && *nb >= step) {
268 lp->skip = 0;
269 lp->ptr = leaf;
270 *index += step;
271 *nb -= step;
272 } else {
273 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
275 ++lp;
279 static void phys_page_set(AddressSpaceDispatch *d,
280 hwaddr index, hwaddr nb,
281 uint16_t leaf)
283 /* Wildly overreserve - it doesn't matter much. */
284 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
286 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
289 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
290 * and update our entry so we can skip it and go directly to the destination.
292 static void phys_page_compact(PhysPageEntry *lp, Node *nodes)
294 unsigned valid_ptr = P_L2_SIZE;
295 int valid = 0;
296 PhysPageEntry *p;
297 int i;
299 if (lp->ptr == PHYS_MAP_NODE_NIL) {
300 return;
303 p = nodes[lp->ptr];
304 for (i = 0; i < P_L2_SIZE; i++) {
305 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
306 continue;
309 valid_ptr = i;
310 valid++;
311 if (p[i].skip) {
312 phys_page_compact(&p[i], nodes);
316 /* We can only compress if there's only one child. */
317 if (valid != 1) {
318 return;
321 assert(valid_ptr < P_L2_SIZE);
323 /* Don't compress if it won't fit in the # of bits we have. */
324 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
325 return;
328 lp->ptr = p[valid_ptr].ptr;
329 if (!p[valid_ptr].skip) {
330 /* If our only child is a leaf, make this a leaf. */
331 /* By design, we should have made this node a leaf to begin with so we
332 * should never reach here.
333 * But since it's so simple to handle this, let's do it just in case we
334 * change this rule.
336 lp->skip = 0;
337 } else {
338 lp->skip += p[valid_ptr].skip;
342 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
344 if (d->phys_map.skip) {
345 phys_page_compact(&d->phys_map, d->map.nodes);
349 static inline bool section_covers_addr(const MemoryRegionSection *section,
350 hwaddr addr)
352 /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
353 * the section must cover the entire address space.
355 return int128_gethi(section->size) ||
356 range_covers_byte(section->offset_within_address_space,
357 int128_getlo(section->size), addr);
360 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
361 Node *nodes, MemoryRegionSection *sections)
363 PhysPageEntry *p;
364 hwaddr index = addr >> TARGET_PAGE_BITS;
365 int i;
367 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
368 if (lp.ptr == PHYS_MAP_NODE_NIL) {
369 return &sections[PHYS_SECTION_UNASSIGNED];
371 p = nodes[lp.ptr];
372 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
375 if (section_covers_addr(&sections[lp.ptr], addr)) {
376 return &sections[lp.ptr];
377 } else {
378 return &sections[PHYS_SECTION_UNASSIGNED];
382 bool memory_region_is_unassigned(MemoryRegion *mr)
384 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
385 && mr != &io_mem_watch;
388 /* Called from RCU critical section */
389 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
390 hwaddr addr,
391 bool resolve_subpage)
393 MemoryRegionSection *section = atomic_read(&d->mru_section);
394 subpage_t *subpage;
395 bool update;
397 if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
398 section_covers_addr(section, addr)) {
399 update = false;
400 } else {
401 section = phys_page_find(d->phys_map, addr, d->map.nodes,
402 d->map.sections);
403 update = true;
405 if (resolve_subpage && section->mr->subpage) {
406 subpage = container_of(section->mr, subpage_t, iomem);
407 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
409 if (update) {
410 atomic_set(&d->mru_section, section);
412 return section;
415 /* Called from RCU critical section */
416 static MemoryRegionSection *
417 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
418 hwaddr *plen, bool resolve_subpage)
420 MemoryRegionSection *section;
421 MemoryRegion *mr;
422 Int128 diff;
424 section = address_space_lookup_region(d, addr, resolve_subpage);
425 /* Compute offset within MemoryRegionSection */
426 addr -= section->offset_within_address_space;
428 /* Compute offset within MemoryRegion */
429 *xlat = addr + section->offset_within_region;
431 mr = section->mr;
433 /* MMIO registers can be expected to perform full-width accesses based only
434 * on their address, without considering adjacent registers that could
435 * decode to completely different MemoryRegions. When such registers
436 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
437 * regions overlap wildly. For this reason we cannot clamp the accesses
438 * here.
440 * If the length is small (as is the case for address_space_ldl/stl),
441 * everything works fine. If the incoming length is large, however,
442 * the caller really has to do the clamping through memory_access_size.
444 if (memory_region_is_ram(mr)) {
445 diff = int128_sub(section->size, int128_make64(addr));
446 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
448 return section;
451 /* Called from RCU critical section */
452 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
453 hwaddr *xlat, hwaddr *plen,
454 bool is_write)
456 IOMMUTLBEntry iotlb;
457 MemoryRegionSection *section;
458 MemoryRegion *mr;
460 for (;;) {
461 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
462 section = address_space_translate_internal(d, addr, &addr, plen, true);
463 mr = section->mr;
465 if (!mr->iommu_ops) {
466 break;
469 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
470 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
471 | (addr & iotlb.addr_mask));
472 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
473 if (!(iotlb.perm & (1 << is_write))) {
474 mr = &io_mem_unassigned;
475 break;
478 as = iotlb.target_as;
481 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
482 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
483 *plen = MIN(page, *plen);
486 *xlat = addr;
487 return mr;
490 /* Called from RCU critical section */
491 MemoryRegionSection *
492 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
493 hwaddr *xlat, hwaddr *plen)
495 MemoryRegionSection *section;
496 AddressSpaceDispatch *d = atomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch);
498 section = address_space_translate_internal(d, addr, xlat, plen, false);
500 assert(!section->mr->iommu_ops);
501 return section;
503 #endif
505 #if !defined(CONFIG_USER_ONLY)
507 static int cpu_common_post_load(void *opaque, int version_id)
509 CPUState *cpu = opaque;
511 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
512 version_id is increased. */
513 cpu->interrupt_request &= ~0x01;
514 tlb_flush(cpu, 1);
516 return 0;
519 static int cpu_common_pre_load(void *opaque)
521 CPUState *cpu = opaque;
523 cpu->exception_index = -1;
525 return 0;
528 static bool cpu_common_exception_index_needed(void *opaque)
530 CPUState *cpu = opaque;
532 return tcg_enabled() && cpu->exception_index != -1;
535 static const VMStateDescription vmstate_cpu_common_exception_index = {
536 .name = "cpu_common/exception_index",
537 .version_id = 1,
538 .minimum_version_id = 1,
539 .needed = cpu_common_exception_index_needed,
540 .fields = (VMStateField[]) {
541 VMSTATE_INT32(exception_index, CPUState),
542 VMSTATE_END_OF_LIST()
546 static bool cpu_common_crash_occurred_needed(void *opaque)
548 CPUState *cpu = opaque;
550 return cpu->crash_occurred;
553 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
554 .name = "cpu_common/crash_occurred",
555 .version_id = 1,
556 .minimum_version_id = 1,
557 .needed = cpu_common_crash_occurred_needed,
558 .fields = (VMStateField[]) {
559 VMSTATE_BOOL(crash_occurred, CPUState),
560 VMSTATE_END_OF_LIST()
564 const VMStateDescription vmstate_cpu_common = {
565 .name = "cpu_common",
566 .version_id = 1,
567 .minimum_version_id = 1,
568 .pre_load = cpu_common_pre_load,
569 .post_load = cpu_common_post_load,
570 .fields = (VMStateField[]) {
571 VMSTATE_UINT32(halted, CPUState),
572 VMSTATE_UINT32(interrupt_request, CPUState),
573 VMSTATE_END_OF_LIST()
575 .subsections = (const VMStateDescription*[]) {
576 &vmstate_cpu_common_exception_index,
577 &vmstate_cpu_common_crash_occurred,
578 NULL
582 #endif
584 CPUState *qemu_get_cpu(int index)
586 CPUState *cpu;
588 CPU_FOREACH(cpu) {
589 if (cpu->cpu_index == index) {
590 return cpu;
594 return NULL;
597 #if !defined(CONFIG_USER_ONLY)
598 void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
600 CPUAddressSpace *newas;
602 /* Target code should have set num_ases before calling us */
603 assert(asidx < cpu->num_ases);
605 if (asidx == 0) {
606 /* address space 0 gets the convenience alias */
607 cpu->as = as;
610 /* KVM cannot currently support multiple address spaces. */
611 assert(asidx == 0 || !kvm_enabled());
613 if (!cpu->cpu_ases) {
614 cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
617 newas = &cpu->cpu_ases[asidx];
618 newas->cpu = cpu;
619 newas->as = as;
620 if (tcg_enabled()) {
621 newas->tcg_as_listener.commit = tcg_commit;
622 memory_listener_register(&newas->tcg_as_listener, as);
626 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
628 /* Return the AddressSpace corresponding to the specified index */
629 return cpu->cpu_ases[asidx].as;
631 #endif
633 void cpu_exec_unrealizefn(CPUState *cpu)
635 CPUClass *cc = CPU_GET_CLASS(cpu);
637 cpu_list_remove(cpu);
639 if (cc->vmsd != NULL) {
640 vmstate_unregister(NULL, cc->vmsd, cpu);
642 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
643 vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
647 void cpu_exec_initfn(CPUState *cpu)
649 cpu->as = NULL;
650 cpu->num_ases = 0;
652 #ifndef CONFIG_USER_ONLY
653 cpu->thread_id = qemu_get_thread_id();
655 /* This is a softmmu CPU object, so create a property for it
656 * so users can wire up its memory. (This can't go in qom/cpu.c
657 * because that file is compiled only once for both user-mode
658 * and system builds.) The default if no link is set up is to use
659 * the system address space.
661 object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
662 (Object **)&cpu->memory,
663 qdev_prop_allow_set_link_before_realize,
664 OBJ_PROP_LINK_UNREF_ON_RELEASE,
665 &error_abort);
666 cpu->memory = system_memory;
667 object_ref(OBJECT(cpu->memory));
668 #endif
671 void cpu_exec_realizefn(CPUState *cpu, Error **errp)
673 CPUClass *cc ATTRIBUTE_UNUSED = CPU_GET_CLASS(cpu);
675 cpu_list_add(cpu);
677 #ifndef CONFIG_USER_ONLY
678 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
679 vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
681 if (cc->vmsd != NULL) {
682 vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
684 #endif
687 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
689 /* Flush the whole TB as this will not have race conditions
690 * even if we don't have proper locking yet.
691 * Ideally we would just invalidate the TBs for the
692 * specified PC.
694 tb_flush(cpu);
697 #if defined(CONFIG_USER_ONLY)
698 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
703 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
704 int flags)
706 return -ENOSYS;
709 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
713 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
714 int flags, CPUWatchpoint **watchpoint)
716 return -ENOSYS;
718 #else
719 /* Add a watchpoint. */
720 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
721 int flags, CPUWatchpoint **watchpoint)
723 CPUWatchpoint *wp;
725 /* forbid ranges which are empty or run off the end of the address space */
726 if (len == 0 || (addr + len - 1) < addr) {
727 error_report("tried to set invalid watchpoint at %"
728 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
729 return -EINVAL;
731 wp = g_malloc(sizeof(*wp));
733 wp->vaddr = addr;
734 wp->len = len;
735 wp->flags = flags;
737 /* keep all GDB-injected watchpoints in front */
738 if (flags & BP_GDB) {
739 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
740 } else {
741 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
744 tlb_flush_page(cpu, addr);
746 if (watchpoint)
747 *watchpoint = wp;
748 return 0;
751 /* Remove a specific watchpoint. */
752 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
753 int flags)
755 CPUWatchpoint *wp;
757 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
758 if (addr == wp->vaddr && len == wp->len
759 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
760 cpu_watchpoint_remove_by_ref(cpu, wp);
761 return 0;
764 return -ENOENT;
767 /* Remove a specific watchpoint by reference. */
768 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
770 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
772 tlb_flush_page(cpu, watchpoint->vaddr);
774 g_free(watchpoint);
777 /* Remove all matching watchpoints. */
778 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
780 CPUWatchpoint *wp, *next;
782 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
783 if (wp->flags & mask) {
784 cpu_watchpoint_remove_by_ref(cpu, wp);
789 /* Return true if this watchpoint address matches the specified
790 * access (ie the address range covered by the watchpoint overlaps
791 * partially or completely with the address range covered by the
792 * access).
794 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
795 vaddr addr,
796 vaddr len)
798 /* We know the lengths are non-zero, but a little caution is
799 * required to avoid errors in the case where the range ends
800 * exactly at the top of the address space and so addr + len
801 * wraps round to zero.
803 vaddr wpend = wp->vaddr + wp->len - 1;
804 vaddr addrend = addr + len - 1;
806 return !(addr > wpend || wp->vaddr > addrend);
809 #endif
811 /* Add a breakpoint. */
812 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
813 CPUBreakpoint **breakpoint)
815 CPUBreakpoint *bp;
817 bp = g_malloc(sizeof(*bp));
819 bp->pc = pc;
820 bp->flags = flags;
822 /* keep all GDB-injected breakpoints in front */
823 if (flags & BP_GDB) {
824 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
825 } else {
826 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
829 breakpoint_invalidate(cpu, pc);
831 if (breakpoint) {
832 *breakpoint = bp;
834 return 0;
837 /* Remove a specific breakpoint. */
838 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
840 CPUBreakpoint *bp;
842 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
843 if (bp->pc == pc && bp->flags == flags) {
844 cpu_breakpoint_remove_by_ref(cpu, bp);
845 return 0;
848 return -ENOENT;
851 /* Remove a specific breakpoint by reference. */
852 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
854 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
856 breakpoint_invalidate(cpu, breakpoint->pc);
858 g_free(breakpoint);
861 /* Remove all matching breakpoints. */
862 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
864 CPUBreakpoint *bp, *next;
866 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
867 if (bp->flags & mask) {
868 cpu_breakpoint_remove_by_ref(cpu, bp);
873 /* enable or disable single step mode. EXCP_DEBUG is returned by the
874 CPU loop after each instruction */
875 void cpu_single_step(CPUState *cpu, int enabled)
877 if (cpu->singlestep_enabled != enabled) {
878 cpu->singlestep_enabled = enabled;
879 if (kvm_enabled()) {
880 kvm_update_guest_debug(cpu, 0);
881 } else {
882 /* must flush all the translated code to avoid inconsistencies */
883 /* XXX: only flush what is necessary */
884 tb_flush(cpu);
889 void cpu_abort(CPUState *cpu, const char *fmt, ...)
891 va_list ap;
892 va_list ap2;
894 va_start(ap, fmt);
895 va_copy(ap2, ap);
896 fprintf(stderr, "qemu: fatal: ");
897 vfprintf(stderr, fmt, ap);
898 fprintf(stderr, "\n");
899 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
900 if (qemu_log_separate()) {
901 qemu_log_lock();
902 qemu_log("qemu: fatal: ");
903 qemu_log_vprintf(fmt, ap2);
904 qemu_log("\n");
905 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
906 qemu_log_flush();
907 qemu_log_unlock();
908 qemu_log_close();
910 va_end(ap2);
911 va_end(ap);
912 replay_finish();
913 #if defined(CONFIG_USER_ONLY)
915 struct sigaction act;
916 sigfillset(&act.sa_mask);
917 act.sa_handler = SIG_DFL;
918 sigaction(SIGABRT, &act, NULL);
920 #endif
921 abort();
924 #if !defined(CONFIG_USER_ONLY)
925 /* Called from RCU critical section */
926 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
928 RAMBlock *block;
930 block = atomic_rcu_read(&ram_list.mru_block);
931 if (block && addr - block->offset < block->max_length) {
932 return block;
934 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
935 if (addr - block->offset < block->max_length) {
936 goto found;
940 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
941 abort();
943 found:
944 /* It is safe to write mru_block outside the iothread lock. This
945 * is what happens:
947 * mru_block = xxx
948 * rcu_read_unlock()
949 * xxx removed from list
950 * rcu_read_lock()
951 * read mru_block
952 * mru_block = NULL;
953 * call_rcu(reclaim_ramblock, xxx);
954 * rcu_read_unlock()
956 * atomic_rcu_set is not needed here. The block was already published
957 * when it was placed into the list. Here we're just making an extra
958 * copy of the pointer.
960 ram_list.mru_block = block;
961 return block;
964 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
966 CPUState *cpu;
967 ram_addr_t start1;
968 RAMBlock *block;
969 ram_addr_t end;
971 end = TARGET_PAGE_ALIGN(start + length);
972 start &= TARGET_PAGE_MASK;
974 rcu_read_lock();
975 block = qemu_get_ram_block(start);
976 assert(block == qemu_get_ram_block(end - 1));
977 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
978 CPU_FOREACH(cpu) {
979 tlb_reset_dirty(cpu, start1, length);
981 rcu_read_unlock();
984 /* Note: start and end must be within the same ram block. */
985 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
986 ram_addr_t length,
987 unsigned client)
989 DirtyMemoryBlocks *blocks;
990 unsigned long end, page;
991 bool dirty = false;
993 if (length == 0) {
994 return false;
997 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
998 page = start >> TARGET_PAGE_BITS;
1000 rcu_read_lock();
1002 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1004 while (page < end) {
1005 unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1006 unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1007 unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1009 dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1010 offset, num);
1011 page += num;
1014 rcu_read_unlock();
1016 if (dirty && tcg_enabled()) {
1017 tlb_reset_dirty_range_all(start, length);
1020 return dirty;
1023 /* Called from RCU critical section */
1024 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1025 MemoryRegionSection *section,
1026 target_ulong vaddr,
1027 hwaddr paddr, hwaddr xlat,
1028 int prot,
1029 target_ulong *address)
1031 hwaddr iotlb;
1032 CPUWatchpoint *wp;
1034 if (memory_region_is_ram(section->mr)) {
1035 /* Normal RAM. */
1036 iotlb = memory_region_get_ram_addr(section->mr) + xlat;
1037 if (!section->readonly) {
1038 iotlb |= PHYS_SECTION_NOTDIRTY;
1039 } else {
1040 iotlb |= PHYS_SECTION_ROM;
1042 } else {
1043 AddressSpaceDispatch *d;
1045 d = atomic_rcu_read(&section->address_space->dispatch);
1046 iotlb = section - d->map.sections;
1047 iotlb += xlat;
1050 /* Make accesses to pages with watchpoints go via the
1051 watchpoint trap routines. */
1052 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1053 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1054 /* Avoid trapping reads of pages with a write breakpoint. */
1055 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1056 iotlb = PHYS_SECTION_WATCH + paddr;
1057 *address |= TLB_MMIO;
1058 break;
1063 return iotlb;
1065 #endif /* defined(CONFIG_USER_ONLY) */
1067 #if !defined(CONFIG_USER_ONLY)
1069 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1070 uint16_t section);
1071 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1073 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1074 qemu_anon_ram_alloc;
1077 * Set a custom physical guest memory alloator.
1078 * Accelerators with unusual needs may need this. Hopefully, we can
1079 * get rid of it eventually.
1081 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1083 phys_mem_alloc = alloc;
1086 static uint16_t phys_section_add(PhysPageMap *map,
1087 MemoryRegionSection *section)
1089 /* The physical section number is ORed with a page-aligned
1090 * pointer to produce the iotlb entries. Thus it should
1091 * never overflow into the page-aligned value.
1093 assert(map->sections_nb < TARGET_PAGE_SIZE);
1095 if (map->sections_nb == map->sections_nb_alloc) {
1096 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1097 map->sections = g_renew(MemoryRegionSection, map->sections,
1098 map->sections_nb_alloc);
1100 map->sections[map->sections_nb] = *section;
1101 memory_region_ref(section->mr);
1102 return map->sections_nb++;
1105 static void phys_section_destroy(MemoryRegion *mr)
1107 bool have_sub_page = mr->subpage;
1109 memory_region_unref(mr);
1111 if (have_sub_page) {
1112 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1113 object_unref(OBJECT(&subpage->iomem));
1114 g_free(subpage);
1118 static void phys_sections_free(PhysPageMap *map)
1120 while (map->sections_nb > 0) {
1121 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1122 phys_section_destroy(section->mr);
1124 g_free(map->sections);
1125 g_free(map->nodes);
1128 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1130 subpage_t *subpage;
1131 hwaddr base = section->offset_within_address_space
1132 & TARGET_PAGE_MASK;
1133 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1134 d->map.nodes, d->map.sections);
1135 MemoryRegionSection subsection = {
1136 .offset_within_address_space = base,
1137 .size = int128_make64(TARGET_PAGE_SIZE),
1139 hwaddr start, end;
1141 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1143 if (!(existing->mr->subpage)) {
1144 subpage = subpage_init(d->as, base);
1145 subsection.address_space = d->as;
1146 subsection.mr = &subpage->iomem;
1147 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1148 phys_section_add(&d->map, &subsection));
1149 } else {
1150 subpage = container_of(existing->mr, subpage_t, iomem);
1152 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1153 end = start + int128_get64(section->size) - 1;
1154 subpage_register(subpage, start, end,
1155 phys_section_add(&d->map, section));
1159 static void register_multipage(AddressSpaceDispatch *d,
1160 MemoryRegionSection *section)
1162 hwaddr start_addr = section->offset_within_address_space;
1163 uint16_t section_index = phys_section_add(&d->map, section);
1164 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1165 TARGET_PAGE_BITS));
1167 assert(num_pages);
1168 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1171 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1173 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1174 AddressSpaceDispatch *d = as->next_dispatch;
1175 MemoryRegionSection now = *section, remain = *section;
1176 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1178 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1179 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1180 - now.offset_within_address_space;
1182 now.size = int128_min(int128_make64(left), now.size);
1183 register_subpage(d, &now);
1184 } else {
1185 now.size = int128_zero();
1187 while (int128_ne(remain.size, now.size)) {
1188 remain.size = int128_sub(remain.size, now.size);
1189 remain.offset_within_address_space += int128_get64(now.size);
1190 remain.offset_within_region += int128_get64(now.size);
1191 now = remain;
1192 if (int128_lt(remain.size, page_size)) {
1193 register_subpage(d, &now);
1194 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1195 now.size = page_size;
1196 register_subpage(d, &now);
1197 } else {
1198 now.size = int128_and(now.size, int128_neg(page_size));
1199 register_multipage(d, &now);
1204 void qemu_flush_coalesced_mmio_buffer(void)
1206 if (kvm_enabled())
1207 kvm_flush_coalesced_mmio_buffer();
1210 void qemu_mutex_lock_ramlist(void)
1212 qemu_mutex_lock(&ram_list.mutex);
1215 void qemu_mutex_unlock_ramlist(void)
1217 qemu_mutex_unlock(&ram_list.mutex);
1220 #ifdef __linux__
1221 static int64_t get_file_size(int fd)
1223 int64_t size = lseek(fd, 0, SEEK_END);
1224 if (size < 0) {
1225 return -errno;
1227 return size;
1230 static void *file_ram_alloc(RAMBlock *block,
1231 ram_addr_t memory,
1232 const char *path,
1233 Error **errp)
1235 bool unlink_on_error = false;
1236 char *filename;
1237 char *sanitized_name;
1238 char *c;
1239 void *area = MAP_FAILED;
1240 int fd = -1;
1241 int64_t file_size;
1243 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1244 error_setg(errp,
1245 "host lacks kvm mmu notifiers, -mem-path unsupported");
1246 return NULL;
1249 for (;;) {
1250 fd = open(path, O_RDWR);
1251 if (fd >= 0) {
1252 /* @path names an existing file, use it */
1253 break;
1255 if (errno == ENOENT) {
1256 /* @path names a file that doesn't exist, create it */
1257 fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1258 if (fd >= 0) {
1259 unlink_on_error = true;
1260 break;
1262 } else if (errno == EISDIR) {
1263 /* @path names a directory, create a file there */
1264 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1265 sanitized_name = g_strdup(memory_region_name(block->mr));
1266 for (c = sanitized_name; *c != '\0'; c++) {
1267 if (*c == '/') {
1268 *c = '_';
1272 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1273 sanitized_name);
1274 g_free(sanitized_name);
1276 fd = mkstemp(filename);
1277 if (fd >= 0) {
1278 unlink(filename);
1279 g_free(filename);
1280 break;
1282 g_free(filename);
1284 if (errno != EEXIST && errno != EINTR) {
1285 error_setg_errno(errp, errno,
1286 "can't open backing store %s for guest RAM",
1287 path);
1288 goto error;
1291 * Try again on EINTR and EEXIST. The latter happens when
1292 * something else creates the file between our two open().
1296 block->page_size = qemu_fd_getpagesize(fd);
1297 block->mr->align = block->page_size;
1298 #if defined(__s390x__)
1299 if (kvm_enabled()) {
1300 block->mr->align = MAX(block->mr->align, QEMU_VMALLOC_ALIGN);
1302 #endif
1304 file_size = get_file_size(fd);
1306 if (memory < block->page_size) {
1307 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1308 "or larger than page size 0x%zx",
1309 memory, block->page_size);
1310 goto error;
1313 if (file_size > 0 && file_size < memory) {
1314 error_setg(errp, "backing store %s size 0x%" PRIx64
1315 " does not match 'size' option 0x" RAM_ADDR_FMT,
1316 path, file_size, memory);
1317 goto error;
1320 memory = ROUND_UP(memory, block->page_size);
1323 * ftruncate is not supported by hugetlbfs in older
1324 * hosts, so don't bother bailing out on errors.
1325 * If anything goes wrong with it under other filesystems,
1326 * mmap will fail.
1328 * Do not truncate the non-empty backend file to avoid corrupting
1329 * the existing data in the file. Disabling shrinking is not
1330 * enough. For example, the current vNVDIMM implementation stores
1331 * the guest NVDIMM labels at the end of the backend file. If the
1332 * backend file is later extended, QEMU will not be able to find
1333 * those labels. Therefore, extending the non-empty backend file
1334 * is disabled as well.
1336 if (!file_size && ftruncate(fd, memory)) {
1337 perror("ftruncate");
1340 area = qemu_ram_mmap(fd, memory, block->mr->align,
1341 block->flags & RAM_SHARED);
1342 if (area == MAP_FAILED) {
1343 error_setg_errno(errp, errno,
1344 "unable to map backing store for guest RAM");
1345 goto error;
1348 if (mem_prealloc) {
1349 os_mem_prealloc(fd, area, memory, errp);
1350 if (errp && *errp) {
1351 goto error;
1355 block->fd = fd;
1356 return area;
1358 error:
1359 if (area != MAP_FAILED) {
1360 qemu_ram_munmap(area, memory);
1362 if (unlink_on_error) {
1363 unlink(path);
1365 if (fd != -1) {
1366 close(fd);
1368 return NULL;
1370 #endif
1372 /* Called with the ramlist lock held. */
1373 static ram_addr_t find_ram_offset(ram_addr_t size)
1375 RAMBlock *block, *next_block;
1376 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1378 assert(size != 0); /* it would hand out same offset multiple times */
1380 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1381 return 0;
1384 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1385 ram_addr_t end, next = RAM_ADDR_MAX;
1387 end = block->offset + block->max_length;
1389 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1390 if (next_block->offset >= end) {
1391 next = MIN(next, next_block->offset);
1394 if (next - end >= size && next - end < mingap) {
1395 offset = end;
1396 mingap = next - end;
1400 if (offset == RAM_ADDR_MAX) {
1401 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1402 (uint64_t)size);
1403 abort();
1406 return offset;
1409 ram_addr_t last_ram_offset(void)
1411 RAMBlock *block;
1412 ram_addr_t last = 0;
1414 rcu_read_lock();
1415 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1416 last = MAX(last, block->offset + block->max_length);
1418 rcu_read_unlock();
1419 return last;
1422 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1424 int ret;
1426 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1427 if (!machine_dump_guest_core(current_machine)) {
1428 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1429 if (ret) {
1430 perror("qemu_madvise");
1431 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1432 "but dump_guest_core=off specified\n");
1437 const char *qemu_ram_get_idstr(RAMBlock *rb)
1439 return rb->idstr;
1442 /* Called with iothread lock held. */
1443 void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
1445 RAMBlock *block;
1447 assert(new_block);
1448 assert(!new_block->idstr[0]);
1450 if (dev) {
1451 char *id = qdev_get_dev_path(dev);
1452 if (id) {
1453 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1454 g_free(id);
1457 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1459 rcu_read_lock();
1460 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1461 if (block != new_block &&
1462 !strcmp(block->idstr, new_block->idstr)) {
1463 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1464 new_block->idstr);
1465 abort();
1468 rcu_read_unlock();
1471 /* Called with iothread lock held. */
1472 void qemu_ram_unset_idstr(RAMBlock *block)
1474 /* FIXME: arch_init.c assumes that this is not called throughout
1475 * migration. Ignore the problem since hot-unplug during migration
1476 * does not work anyway.
1478 if (block) {
1479 memset(block->idstr, 0, sizeof(block->idstr));
1483 size_t qemu_ram_pagesize(RAMBlock *rb)
1485 return rb->page_size;
1488 static int memory_try_enable_merging(void *addr, size_t len)
1490 if (!machine_mem_merge(current_machine)) {
1491 /* disabled by the user */
1492 return 0;
1495 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1498 /* Only legal before guest might have detected the memory size: e.g. on
1499 * incoming migration, or right after reset.
1501 * As memory core doesn't know how is memory accessed, it is up to
1502 * resize callback to update device state and/or add assertions to detect
1503 * misuse, if necessary.
1505 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
1507 assert(block);
1509 newsize = HOST_PAGE_ALIGN(newsize);
1511 if (block->used_length == newsize) {
1512 return 0;
1515 if (!(block->flags & RAM_RESIZEABLE)) {
1516 error_setg_errno(errp, EINVAL,
1517 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1518 " in != 0x" RAM_ADDR_FMT, block->idstr,
1519 newsize, block->used_length);
1520 return -EINVAL;
1523 if (block->max_length < newsize) {
1524 error_setg_errno(errp, EINVAL,
1525 "Length too large: %s: 0x" RAM_ADDR_FMT
1526 " > 0x" RAM_ADDR_FMT, block->idstr,
1527 newsize, block->max_length);
1528 return -EINVAL;
1531 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1532 block->used_length = newsize;
1533 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1534 DIRTY_CLIENTS_ALL);
1535 memory_region_set_size(block->mr, newsize);
1536 if (block->resized) {
1537 block->resized(block->idstr, newsize, block->host);
1539 return 0;
1542 /* Called with ram_list.mutex held */
1543 static void dirty_memory_extend(ram_addr_t old_ram_size,
1544 ram_addr_t new_ram_size)
1546 ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1547 DIRTY_MEMORY_BLOCK_SIZE);
1548 ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1549 DIRTY_MEMORY_BLOCK_SIZE);
1550 int i;
1552 /* Only need to extend if block count increased */
1553 if (new_num_blocks <= old_num_blocks) {
1554 return;
1557 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1558 DirtyMemoryBlocks *old_blocks;
1559 DirtyMemoryBlocks *new_blocks;
1560 int j;
1562 old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1563 new_blocks = g_malloc(sizeof(*new_blocks) +
1564 sizeof(new_blocks->blocks[0]) * new_num_blocks);
1566 if (old_num_blocks) {
1567 memcpy(new_blocks->blocks, old_blocks->blocks,
1568 old_num_blocks * sizeof(old_blocks->blocks[0]));
1571 for (j = old_num_blocks; j < new_num_blocks; j++) {
1572 new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1575 atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1577 if (old_blocks) {
1578 g_free_rcu(old_blocks, rcu);
1583 static void ram_block_add(RAMBlock *new_block, Error **errp)
1585 RAMBlock *block;
1586 RAMBlock *last_block = NULL;
1587 ram_addr_t old_ram_size, new_ram_size;
1588 Error *err = NULL;
1590 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1592 qemu_mutex_lock_ramlist();
1593 new_block->offset = find_ram_offset(new_block->max_length);
1595 if (!new_block->host) {
1596 if (xen_enabled()) {
1597 xen_ram_alloc(new_block->offset, new_block->max_length,
1598 new_block->mr, &err);
1599 if (err) {
1600 error_propagate(errp, err);
1601 qemu_mutex_unlock_ramlist();
1602 return;
1604 } else {
1605 new_block->host = phys_mem_alloc(new_block->max_length,
1606 &new_block->mr->align);
1607 if (!new_block->host) {
1608 error_setg_errno(errp, errno,
1609 "cannot set up guest memory '%s'",
1610 memory_region_name(new_block->mr));
1611 qemu_mutex_unlock_ramlist();
1612 return;
1614 memory_try_enable_merging(new_block->host, new_block->max_length);
1618 new_ram_size = MAX(old_ram_size,
1619 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1620 if (new_ram_size > old_ram_size) {
1621 migration_bitmap_extend(old_ram_size, new_ram_size);
1622 dirty_memory_extend(old_ram_size, new_ram_size);
1624 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1625 * QLIST (which has an RCU-friendly variant) does not have insertion at
1626 * tail, so save the last element in last_block.
1628 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1629 last_block = block;
1630 if (block->max_length < new_block->max_length) {
1631 break;
1634 if (block) {
1635 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1636 } else if (last_block) {
1637 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1638 } else { /* list is empty */
1639 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1641 ram_list.mru_block = NULL;
1643 /* Write list before version */
1644 smp_wmb();
1645 ram_list.version++;
1646 qemu_mutex_unlock_ramlist();
1648 cpu_physical_memory_set_dirty_range(new_block->offset,
1649 new_block->used_length,
1650 DIRTY_CLIENTS_ALL);
1652 if (new_block->host) {
1653 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1654 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1655 /* MADV_DONTFORK is also needed by KVM in absence of synchronous MMU */
1656 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1660 #ifdef __linux__
1661 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1662 bool share, const char *mem_path,
1663 Error **errp)
1665 RAMBlock *new_block;
1666 Error *local_err = NULL;
1668 if (xen_enabled()) {
1669 error_setg(errp, "-mem-path not supported with Xen");
1670 return NULL;
1673 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1675 * file_ram_alloc() needs to allocate just like
1676 * phys_mem_alloc, but we haven't bothered to provide
1677 * a hook there.
1679 error_setg(errp,
1680 "-mem-path not supported with this accelerator");
1681 return NULL;
1684 size = HOST_PAGE_ALIGN(size);
1685 new_block = g_malloc0(sizeof(*new_block));
1686 new_block->mr = mr;
1687 new_block->used_length = size;
1688 new_block->max_length = size;
1689 new_block->flags = share ? RAM_SHARED : 0;
1690 new_block->host = file_ram_alloc(new_block, size,
1691 mem_path, errp);
1692 if (!new_block->host) {
1693 g_free(new_block);
1694 return NULL;
1697 ram_block_add(new_block, &local_err);
1698 if (local_err) {
1699 g_free(new_block);
1700 error_propagate(errp, local_err);
1701 return NULL;
1703 return new_block;
1705 #endif
1707 static
1708 RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1709 void (*resized)(const char*,
1710 uint64_t length,
1711 void *host),
1712 void *host, bool resizeable,
1713 MemoryRegion *mr, Error **errp)
1715 RAMBlock *new_block;
1716 Error *local_err = NULL;
1718 size = HOST_PAGE_ALIGN(size);
1719 max_size = HOST_PAGE_ALIGN(max_size);
1720 new_block = g_malloc0(sizeof(*new_block));
1721 new_block->mr = mr;
1722 new_block->resized = resized;
1723 new_block->used_length = size;
1724 new_block->max_length = max_size;
1725 assert(max_size >= size);
1726 new_block->fd = -1;
1727 new_block->page_size = getpagesize();
1728 new_block->host = host;
1729 if (host) {
1730 new_block->flags |= RAM_PREALLOC;
1732 if (resizeable) {
1733 new_block->flags |= RAM_RESIZEABLE;
1735 ram_block_add(new_block, &local_err);
1736 if (local_err) {
1737 g_free(new_block);
1738 error_propagate(errp, local_err);
1739 return NULL;
1741 return new_block;
1744 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1745 MemoryRegion *mr, Error **errp)
1747 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1750 RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1752 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1755 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1756 void (*resized)(const char*,
1757 uint64_t length,
1758 void *host),
1759 MemoryRegion *mr, Error **errp)
1761 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1764 static void reclaim_ramblock(RAMBlock *block)
1766 if (block->flags & RAM_PREALLOC) {
1768 } else if (xen_enabled()) {
1769 xen_invalidate_map_cache_entry(block->host);
1770 #ifndef _WIN32
1771 } else if (block->fd >= 0) {
1772 qemu_ram_munmap(block->host, block->max_length);
1773 close(block->fd);
1774 #endif
1775 } else {
1776 qemu_anon_ram_free(block->host, block->max_length);
1778 g_free(block);
1781 void qemu_ram_free(RAMBlock *block)
1783 if (!block) {
1784 return;
1787 qemu_mutex_lock_ramlist();
1788 QLIST_REMOVE_RCU(block, next);
1789 ram_list.mru_block = NULL;
1790 /* Write list before version */
1791 smp_wmb();
1792 ram_list.version++;
1793 call_rcu(block, reclaim_ramblock, rcu);
1794 qemu_mutex_unlock_ramlist();
1797 #ifndef _WIN32
1798 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1800 RAMBlock *block;
1801 ram_addr_t offset;
1802 int flags;
1803 void *area, *vaddr;
1805 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1806 offset = addr - block->offset;
1807 if (offset < block->max_length) {
1808 vaddr = ramblock_ptr(block, offset);
1809 if (block->flags & RAM_PREALLOC) {
1811 } else if (xen_enabled()) {
1812 abort();
1813 } else {
1814 flags = MAP_FIXED;
1815 if (block->fd >= 0) {
1816 flags |= (block->flags & RAM_SHARED ?
1817 MAP_SHARED : MAP_PRIVATE);
1818 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1819 flags, block->fd, offset);
1820 } else {
1822 * Remap needs to match alloc. Accelerators that
1823 * set phys_mem_alloc never remap. If they did,
1824 * we'd need a remap hook here.
1826 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1828 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1829 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1830 flags, -1, 0);
1832 if (area != vaddr) {
1833 fprintf(stderr, "Could not remap addr: "
1834 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1835 length, addr);
1836 exit(1);
1838 memory_try_enable_merging(vaddr, length);
1839 qemu_ram_setup_dump(vaddr, length);
1844 #endif /* !_WIN32 */
1846 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1847 * This should not be used for general purpose DMA. Use address_space_map
1848 * or address_space_rw instead. For local memory (e.g. video ram) that the
1849 * device owns, use memory_region_get_ram_ptr.
1851 * Called within RCU critical section.
1853 void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1855 RAMBlock *block = ram_block;
1857 if (block == NULL) {
1858 block = qemu_get_ram_block(addr);
1859 addr -= block->offset;
1862 if (xen_enabled() && block->host == NULL) {
1863 /* We need to check if the requested address is in the RAM
1864 * because we don't want to map the entire memory in QEMU.
1865 * In that case just map until the end of the page.
1867 if (block->offset == 0) {
1868 return xen_map_cache(addr, 0, 0);
1871 block->host = xen_map_cache(block->offset, block->max_length, 1);
1873 return ramblock_ptr(block, addr);
1876 /* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr
1877 * but takes a size argument.
1879 * Called within RCU critical section.
1881 static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1882 hwaddr *size)
1884 RAMBlock *block = ram_block;
1885 if (*size == 0) {
1886 return NULL;
1889 if (block == NULL) {
1890 block = qemu_get_ram_block(addr);
1891 addr -= block->offset;
1893 *size = MIN(*size, block->max_length - addr);
1895 if (xen_enabled() && block->host == NULL) {
1896 /* We need to check if the requested address is in the RAM
1897 * because we don't want to map the entire memory in QEMU.
1898 * In that case just map the requested area.
1900 if (block->offset == 0) {
1901 return xen_map_cache(addr, *size, 1);
1904 block->host = xen_map_cache(block->offset, block->max_length, 1);
1907 return ramblock_ptr(block, addr);
1911 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1912 * in that RAMBlock.
1914 * ptr: Host pointer to look up
1915 * round_offset: If true round the result offset down to a page boundary
1916 * *ram_addr: set to result ram_addr
1917 * *offset: set to result offset within the RAMBlock
1919 * Returns: RAMBlock (or NULL if not found)
1921 * By the time this function returns, the returned pointer is not protected
1922 * by RCU anymore. If the caller is not within an RCU critical section and
1923 * does not hold the iothread lock, it must have other means of protecting the
1924 * pointer, such as a reference to the region that includes the incoming
1925 * ram_addr_t.
1927 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1928 ram_addr_t *offset)
1930 RAMBlock *block;
1931 uint8_t *host = ptr;
1933 if (xen_enabled()) {
1934 ram_addr_t ram_addr;
1935 rcu_read_lock();
1936 ram_addr = xen_ram_addr_from_mapcache(ptr);
1937 block = qemu_get_ram_block(ram_addr);
1938 if (block) {
1939 *offset = ram_addr - block->offset;
1941 rcu_read_unlock();
1942 return block;
1945 rcu_read_lock();
1946 block = atomic_rcu_read(&ram_list.mru_block);
1947 if (block && block->host && host - block->host < block->max_length) {
1948 goto found;
1951 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1952 /* This case append when the block is not mapped. */
1953 if (block->host == NULL) {
1954 continue;
1956 if (host - block->host < block->max_length) {
1957 goto found;
1961 rcu_read_unlock();
1962 return NULL;
1964 found:
1965 *offset = (host - block->host);
1966 if (round_offset) {
1967 *offset &= TARGET_PAGE_MASK;
1969 rcu_read_unlock();
1970 return block;
1974 * Finds the named RAMBlock
1976 * name: The name of RAMBlock to find
1978 * Returns: RAMBlock (or NULL if not found)
1980 RAMBlock *qemu_ram_block_by_name(const char *name)
1982 RAMBlock *block;
1984 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1985 if (!strcmp(name, block->idstr)) {
1986 return block;
1990 return NULL;
1993 /* Some of the softmmu routines need to translate from a host pointer
1994 (typically a TLB entry) back to a ram offset. */
1995 ram_addr_t qemu_ram_addr_from_host(void *ptr)
1997 RAMBlock *block;
1998 ram_addr_t offset;
2000 block = qemu_ram_block_from_host(ptr, false, &offset);
2001 if (!block) {
2002 return RAM_ADDR_INVALID;
2005 return block->offset + offset;
2008 /* Called within RCU critical section. */
2009 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2010 uint64_t val, unsigned size)
2012 bool locked = false;
2014 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
2015 locked = true;
2016 tb_lock();
2017 tb_invalidate_phys_page_fast(ram_addr, size);
2019 switch (size) {
2020 case 1:
2021 stb_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2022 break;
2023 case 2:
2024 stw_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2025 break;
2026 case 4:
2027 stl_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2028 break;
2029 default:
2030 abort();
2033 if (locked) {
2034 tb_unlock();
2037 /* Set both VGA and migration bits for simplicity and to remove
2038 * the notdirty callback faster.
2040 cpu_physical_memory_set_dirty_range(ram_addr, size,
2041 DIRTY_CLIENTS_NOCODE);
2042 /* we remove the notdirty callback only if the code has been
2043 flushed */
2044 if (!cpu_physical_memory_is_clean(ram_addr)) {
2045 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2049 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2050 unsigned size, bool is_write)
2052 return is_write;
2055 static const MemoryRegionOps notdirty_mem_ops = {
2056 .write = notdirty_mem_write,
2057 .valid.accepts = notdirty_mem_accepts,
2058 .endianness = DEVICE_NATIVE_ENDIAN,
2061 /* Generate a debug exception if a watchpoint has been hit. */
2062 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2064 CPUState *cpu = current_cpu;
2065 CPUClass *cc = CPU_GET_CLASS(cpu);
2066 CPUArchState *env = cpu->env_ptr;
2067 target_ulong pc, cs_base;
2068 target_ulong vaddr;
2069 CPUWatchpoint *wp;
2070 uint32_t cpu_flags;
2072 if (cpu->watchpoint_hit) {
2073 /* We re-entered the check after replacing the TB. Now raise
2074 * the debug interrupt so that is will trigger after the
2075 * current instruction. */
2076 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2077 return;
2079 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2080 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2081 if (cpu_watchpoint_address_matches(wp, vaddr, len)
2082 && (wp->flags & flags)) {
2083 if (flags == BP_MEM_READ) {
2084 wp->flags |= BP_WATCHPOINT_HIT_READ;
2085 } else {
2086 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2088 wp->hitaddr = vaddr;
2089 wp->hitattrs = attrs;
2090 if (!cpu->watchpoint_hit) {
2091 if (wp->flags & BP_CPU &&
2092 !cc->debug_check_watchpoint(cpu, wp)) {
2093 wp->flags &= ~BP_WATCHPOINT_HIT;
2094 continue;
2096 cpu->watchpoint_hit = wp;
2098 /* The tb_lock will be reset when cpu_loop_exit or
2099 * cpu_loop_exit_noexc longjmp back into the cpu_exec
2100 * main loop.
2102 tb_lock();
2103 tb_check_watchpoint(cpu);
2104 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2105 cpu->exception_index = EXCP_DEBUG;
2106 cpu_loop_exit(cpu);
2107 } else {
2108 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2109 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2110 cpu_loop_exit_noexc(cpu);
2113 } else {
2114 wp->flags &= ~BP_WATCHPOINT_HIT;
2119 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2120 so these check for a hit then pass through to the normal out-of-line
2121 phys routines. */
2122 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2123 unsigned size, MemTxAttrs attrs)
2125 MemTxResult res;
2126 uint64_t data;
2127 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2128 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2130 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2131 switch (size) {
2132 case 1:
2133 data = address_space_ldub(as, addr, attrs, &res);
2134 break;
2135 case 2:
2136 data = address_space_lduw(as, addr, attrs, &res);
2137 break;
2138 case 4:
2139 data = address_space_ldl(as, addr, attrs, &res);
2140 break;
2141 default: abort();
2143 *pdata = data;
2144 return res;
2147 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2148 uint64_t val, unsigned size,
2149 MemTxAttrs attrs)
2151 MemTxResult res;
2152 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2153 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2155 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2156 switch (size) {
2157 case 1:
2158 address_space_stb(as, addr, val, attrs, &res);
2159 break;
2160 case 2:
2161 address_space_stw(as, addr, val, attrs, &res);
2162 break;
2163 case 4:
2164 address_space_stl(as, addr, val, attrs, &res);
2165 break;
2166 default: abort();
2168 return res;
2171 static const MemoryRegionOps watch_mem_ops = {
2172 .read_with_attrs = watch_mem_read,
2173 .write_with_attrs = watch_mem_write,
2174 .endianness = DEVICE_NATIVE_ENDIAN,
2177 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2178 unsigned len, MemTxAttrs attrs)
2180 subpage_t *subpage = opaque;
2181 uint8_t buf[8];
2182 MemTxResult res;
2184 #if defined(DEBUG_SUBPAGE)
2185 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2186 subpage, len, addr);
2187 #endif
2188 res = address_space_read(subpage->as, addr + subpage->base,
2189 attrs, buf, len);
2190 if (res) {
2191 return res;
2193 switch (len) {
2194 case 1:
2195 *data = ldub_p(buf);
2196 return MEMTX_OK;
2197 case 2:
2198 *data = lduw_p(buf);
2199 return MEMTX_OK;
2200 case 4:
2201 *data = ldl_p(buf);
2202 return MEMTX_OK;
2203 case 8:
2204 *data = ldq_p(buf);
2205 return MEMTX_OK;
2206 default:
2207 abort();
2211 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2212 uint64_t value, unsigned len, MemTxAttrs attrs)
2214 subpage_t *subpage = opaque;
2215 uint8_t buf[8];
2217 #if defined(DEBUG_SUBPAGE)
2218 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2219 " value %"PRIx64"\n",
2220 __func__, subpage, len, addr, value);
2221 #endif
2222 switch (len) {
2223 case 1:
2224 stb_p(buf, value);
2225 break;
2226 case 2:
2227 stw_p(buf, value);
2228 break;
2229 case 4:
2230 stl_p(buf, value);
2231 break;
2232 case 8:
2233 stq_p(buf, value);
2234 break;
2235 default:
2236 abort();
2238 return address_space_write(subpage->as, addr + subpage->base,
2239 attrs, buf, len);
2242 static bool subpage_accepts(void *opaque, hwaddr addr,
2243 unsigned len, bool is_write)
2245 subpage_t *subpage = opaque;
2246 #if defined(DEBUG_SUBPAGE)
2247 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2248 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2249 #endif
2251 return address_space_access_valid(subpage->as, addr + subpage->base,
2252 len, is_write);
2255 static const MemoryRegionOps subpage_ops = {
2256 .read_with_attrs = subpage_read,
2257 .write_with_attrs = subpage_write,
2258 .impl.min_access_size = 1,
2259 .impl.max_access_size = 8,
2260 .valid.min_access_size = 1,
2261 .valid.max_access_size = 8,
2262 .valid.accepts = subpage_accepts,
2263 .endianness = DEVICE_NATIVE_ENDIAN,
2266 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2267 uint16_t section)
2269 int idx, eidx;
2271 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2272 return -1;
2273 idx = SUBPAGE_IDX(start);
2274 eidx = SUBPAGE_IDX(end);
2275 #if defined(DEBUG_SUBPAGE)
2276 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2277 __func__, mmio, start, end, idx, eidx, section);
2278 #endif
2279 for (; idx <= eidx; idx++) {
2280 mmio->sub_section[idx] = section;
2283 return 0;
2286 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2288 subpage_t *mmio;
2290 mmio = g_malloc0(sizeof(subpage_t) + TARGET_PAGE_SIZE * sizeof(uint16_t));
2291 mmio->as = as;
2292 mmio->base = base;
2293 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2294 NULL, TARGET_PAGE_SIZE);
2295 mmio->iomem.subpage = true;
2296 #if defined(DEBUG_SUBPAGE)
2297 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2298 mmio, base, TARGET_PAGE_SIZE);
2299 #endif
2300 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2302 return mmio;
2305 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2306 MemoryRegion *mr)
2308 assert(as);
2309 MemoryRegionSection section = {
2310 .address_space = as,
2311 .mr = mr,
2312 .offset_within_address_space = 0,
2313 .offset_within_region = 0,
2314 .size = int128_2_64(),
2317 return phys_section_add(map, &section);
2320 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2322 int asidx = cpu_asidx_from_attrs(cpu, attrs);
2323 CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2324 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2325 MemoryRegionSection *sections = d->map.sections;
2327 return sections[index & ~TARGET_PAGE_MASK].mr;
2330 static void io_mem_init(void)
2332 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2333 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2334 NULL, UINT64_MAX);
2335 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2336 NULL, UINT64_MAX);
2337 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2338 NULL, UINT64_MAX);
2341 static void mem_begin(MemoryListener *listener)
2343 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2344 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2345 uint16_t n;
2347 n = dummy_section(&d->map, as, &io_mem_unassigned);
2348 assert(n == PHYS_SECTION_UNASSIGNED);
2349 n = dummy_section(&d->map, as, &io_mem_notdirty);
2350 assert(n == PHYS_SECTION_NOTDIRTY);
2351 n = dummy_section(&d->map, as, &io_mem_rom);
2352 assert(n == PHYS_SECTION_ROM);
2353 n = dummy_section(&d->map, as, &io_mem_watch);
2354 assert(n == PHYS_SECTION_WATCH);
2356 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2357 d->as = as;
2358 as->next_dispatch = d;
2361 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2363 phys_sections_free(&d->map);
2364 g_free(d);
2367 static void mem_commit(MemoryListener *listener)
2369 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2370 AddressSpaceDispatch *cur = as->dispatch;
2371 AddressSpaceDispatch *next = as->next_dispatch;
2373 phys_page_compact_all(next, next->map.nodes_nb);
2375 atomic_rcu_set(&as->dispatch, next);
2376 if (cur) {
2377 call_rcu(cur, address_space_dispatch_free, rcu);
2381 static void tcg_commit(MemoryListener *listener)
2383 CPUAddressSpace *cpuas;
2384 AddressSpaceDispatch *d;
2386 /* since each CPU stores ram addresses in its TLB cache, we must
2387 reset the modified entries */
2388 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2389 cpu_reloading_memory_map();
2390 /* The CPU and TLB are protected by the iothread lock.
2391 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2392 * may have split the RCU critical section.
2394 d = atomic_rcu_read(&cpuas->as->dispatch);
2395 atomic_rcu_set(&cpuas->memory_dispatch, d);
2396 tlb_flush(cpuas->cpu, 1);
2399 void address_space_init_dispatch(AddressSpace *as)
2401 as->dispatch = NULL;
2402 as->dispatch_listener = (MemoryListener) {
2403 .begin = mem_begin,
2404 .commit = mem_commit,
2405 .region_add = mem_add,
2406 .region_nop = mem_add,
2407 .priority = 0,
2409 memory_listener_register(&as->dispatch_listener, as);
2412 void address_space_unregister(AddressSpace *as)
2414 memory_listener_unregister(&as->dispatch_listener);
2417 void address_space_destroy_dispatch(AddressSpace *as)
2419 AddressSpaceDispatch *d = as->dispatch;
2421 atomic_rcu_set(&as->dispatch, NULL);
2422 if (d) {
2423 call_rcu(d, address_space_dispatch_free, rcu);
2427 static void memory_map_init(void)
2429 system_memory = g_malloc(sizeof(*system_memory));
2431 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2432 address_space_init(&address_space_memory, system_memory, "memory");
2434 system_io = g_malloc(sizeof(*system_io));
2435 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2436 65536);
2437 address_space_init(&address_space_io, system_io, "I/O");
2440 MemoryRegion *get_system_memory(void)
2442 return system_memory;
2445 MemoryRegion *get_system_io(void)
2447 return system_io;
2450 #endif /* !defined(CONFIG_USER_ONLY) */
2452 /* physical memory access (slow version, mainly for debug) */
2453 #if defined(CONFIG_USER_ONLY)
2454 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2455 uint8_t *buf, int len, int is_write)
2457 int l, flags;
2458 target_ulong page;
2459 void * p;
2461 while (len > 0) {
2462 page = addr & TARGET_PAGE_MASK;
2463 l = (page + TARGET_PAGE_SIZE) - addr;
2464 if (l > len)
2465 l = len;
2466 flags = page_get_flags(page);
2467 if (!(flags & PAGE_VALID))
2468 return -1;
2469 if (is_write) {
2470 if (!(flags & PAGE_WRITE))
2471 return -1;
2472 /* XXX: this code should not depend on lock_user */
2473 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2474 return -1;
2475 memcpy(p, buf, l);
2476 unlock_user(p, addr, l);
2477 } else {
2478 if (!(flags & PAGE_READ))
2479 return -1;
2480 /* XXX: this code should not depend on lock_user */
2481 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2482 return -1;
2483 memcpy(buf, p, l);
2484 unlock_user(p, addr, 0);
2486 len -= l;
2487 buf += l;
2488 addr += l;
2490 return 0;
2493 #else
2495 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2496 hwaddr length)
2498 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2499 addr += memory_region_get_ram_addr(mr);
2501 /* No early return if dirty_log_mask is or becomes 0, because
2502 * cpu_physical_memory_set_dirty_range will still call
2503 * xen_modified_memory.
2505 if (dirty_log_mask) {
2506 dirty_log_mask =
2507 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2509 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2510 tb_lock();
2511 tb_invalidate_phys_range(addr, addr + length);
2512 tb_unlock();
2513 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2515 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2518 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2520 unsigned access_size_max = mr->ops->valid.max_access_size;
2522 /* Regions are assumed to support 1-4 byte accesses unless
2523 otherwise specified. */
2524 if (access_size_max == 0) {
2525 access_size_max = 4;
2528 /* Bound the maximum access by the alignment of the address. */
2529 if (!mr->ops->impl.unaligned) {
2530 unsigned align_size_max = addr & -addr;
2531 if (align_size_max != 0 && align_size_max < access_size_max) {
2532 access_size_max = align_size_max;
2536 /* Don't attempt accesses larger than the maximum. */
2537 if (l > access_size_max) {
2538 l = access_size_max;
2540 l = pow2floor(l);
2542 return l;
2545 static bool prepare_mmio_access(MemoryRegion *mr)
2547 bool unlocked = !qemu_mutex_iothread_locked();
2548 bool release_lock = false;
2550 if (unlocked && mr->global_locking) {
2551 qemu_mutex_lock_iothread();
2552 unlocked = false;
2553 release_lock = true;
2555 if (mr->flush_coalesced_mmio) {
2556 if (unlocked) {
2557 qemu_mutex_lock_iothread();
2559 qemu_flush_coalesced_mmio_buffer();
2560 if (unlocked) {
2561 qemu_mutex_unlock_iothread();
2565 return release_lock;
2568 /* Called within RCU critical section. */
2569 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2570 MemTxAttrs attrs,
2571 const uint8_t *buf,
2572 int len, hwaddr addr1,
2573 hwaddr l, MemoryRegion *mr)
2575 uint8_t *ptr;
2576 uint64_t val;
2577 MemTxResult result = MEMTX_OK;
2578 bool release_lock = false;
2580 for (;;) {
2581 if (!memory_access_is_direct(mr, true)) {
2582 release_lock |= prepare_mmio_access(mr);
2583 l = memory_access_size(mr, l, addr1);
2584 /* XXX: could force current_cpu to NULL to avoid
2585 potential bugs */
2586 switch (l) {
2587 case 8:
2588 /* 64 bit write access */
2589 val = ldq_p(buf);
2590 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2591 attrs);
2592 break;
2593 case 4:
2594 /* 32 bit write access */
2595 val = ldl_p(buf);
2596 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2597 attrs);
2598 break;
2599 case 2:
2600 /* 16 bit write access */
2601 val = lduw_p(buf);
2602 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2603 attrs);
2604 break;
2605 case 1:
2606 /* 8 bit write access */
2607 val = ldub_p(buf);
2608 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2609 attrs);
2610 break;
2611 default:
2612 abort();
2614 } else {
2615 /* RAM case */
2616 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2617 memcpy(ptr, buf, l);
2618 invalidate_and_set_dirty(mr, addr1, l);
2621 if (release_lock) {
2622 qemu_mutex_unlock_iothread();
2623 release_lock = false;
2626 len -= l;
2627 buf += l;
2628 addr += l;
2630 if (!len) {
2631 break;
2634 l = len;
2635 mr = address_space_translate(as, addr, &addr1, &l, true);
2638 return result;
2641 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2642 const uint8_t *buf, int len)
2644 hwaddr l;
2645 hwaddr addr1;
2646 MemoryRegion *mr;
2647 MemTxResult result = MEMTX_OK;
2649 if (len > 0) {
2650 rcu_read_lock();
2651 l = len;
2652 mr = address_space_translate(as, addr, &addr1, &l, true);
2653 result = address_space_write_continue(as, addr, attrs, buf, len,
2654 addr1, l, mr);
2655 rcu_read_unlock();
2658 return result;
2661 /* Called within RCU critical section. */
2662 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2663 MemTxAttrs attrs, uint8_t *buf,
2664 int len, hwaddr addr1, hwaddr l,
2665 MemoryRegion *mr)
2667 uint8_t *ptr;
2668 uint64_t val;
2669 MemTxResult result = MEMTX_OK;
2670 bool release_lock = false;
2672 for (;;) {
2673 if (!memory_access_is_direct(mr, false)) {
2674 /* I/O case */
2675 release_lock |= prepare_mmio_access(mr);
2676 l = memory_access_size(mr, l, addr1);
2677 switch (l) {
2678 case 8:
2679 /* 64 bit read access */
2680 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2681 attrs);
2682 stq_p(buf, val);
2683 break;
2684 case 4:
2685 /* 32 bit read access */
2686 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2687 attrs);
2688 stl_p(buf, val);
2689 break;
2690 case 2:
2691 /* 16 bit read access */
2692 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2693 attrs);
2694 stw_p(buf, val);
2695 break;
2696 case 1:
2697 /* 8 bit read access */
2698 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2699 attrs);
2700 stb_p(buf, val);
2701 break;
2702 default:
2703 abort();
2705 } else {
2706 /* RAM case */
2707 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2708 memcpy(buf, ptr, l);
2711 if (release_lock) {
2712 qemu_mutex_unlock_iothread();
2713 release_lock = false;
2716 len -= l;
2717 buf += l;
2718 addr += l;
2720 if (!len) {
2721 break;
2724 l = len;
2725 mr = address_space_translate(as, addr, &addr1, &l, false);
2728 return result;
2731 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2732 MemTxAttrs attrs, uint8_t *buf, int len)
2734 hwaddr l;
2735 hwaddr addr1;
2736 MemoryRegion *mr;
2737 MemTxResult result = MEMTX_OK;
2739 if (len > 0) {
2740 rcu_read_lock();
2741 l = len;
2742 mr = address_space_translate(as, addr, &addr1, &l, false);
2743 result = address_space_read_continue(as, addr, attrs, buf, len,
2744 addr1, l, mr);
2745 rcu_read_unlock();
2748 return result;
2751 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2752 uint8_t *buf, int len, bool is_write)
2754 if (is_write) {
2755 return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2756 } else {
2757 return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2761 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2762 int len, int is_write)
2764 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2765 buf, len, is_write);
2768 enum write_rom_type {
2769 WRITE_DATA,
2770 FLUSH_CACHE,
2773 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2774 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2776 hwaddr l;
2777 uint8_t *ptr;
2778 hwaddr addr1;
2779 MemoryRegion *mr;
2781 rcu_read_lock();
2782 while (len > 0) {
2783 l = len;
2784 mr = address_space_translate(as, addr, &addr1, &l, true);
2786 if (!(memory_region_is_ram(mr) ||
2787 memory_region_is_romd(mr))) {
2788 l = memory_access_size(mr, l, addr1);
2789 } else {
2790 /* ROM/RAM case */
2791 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2792 switch (type) {
2793 case WRITE_DATA:
2794 memcpy(ptr, buf, l);
2795 invalidate_and_set_dirty(mr, addr1, l);
2796 break;
2797 case FLUSH_CACHE:
2798 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2799 break;
2802 len -= l;
2803 buf += l;
2804 addr += l;
2806 rcu_read_unlock();
2809 /* used for ROM loading : can write in RAM and ROM */
2810 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2811 const uint8_t *buf, int len)
2813 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2816 void cpu_flush_icache_range(hwaddr start, int len)
2819 * This function should do the same thing as an icache flush that was
2820 * triggered from within the guest. For TCG we are always cache coherent,
2821 * so there is no need to flush anything. For KVM / Xen we need to flush
2822 * the host's instruction cache at least.
2824 if (tcg_enabled()) {
2825 return;
2828 cpu_physical_memory_write_rom_internal(&address_space_memory,
2829 start, NULL, len, FLUSH_CACHE);
2832 typedef struct {
2833 MemoryRegion *mr;
2834 void *buffer;
2835 hwaddr addr;
2836 hwaddr len;
2837 bool in_use;
2838 } BounceBuffer;
2840 static BounceBuffer bounce;
2842 typedef struct MapClient {
2843 QEMUBH *bh;
2844 QLIST_ENTRY(MapClient) link;
2845 } MapClient;
2847 QemuMutex map_client_list_lock;
2848 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2849 = QLIST_HEAD_INITIALIZER(map_client_list);
2851 static void cpu_unregister_map_client_do(MapClient *client)
2853 QLIST_REMOVE(client, link);
2854 g_free(client);
2857 static void cpu_notify_map_clients_locked(void)
2859 MapClient *client;
2861 while (!QLIST_EMPTY(&map_client_list)) {
2862 client = QLIST_FIRST(&map_client_list);
2863 qemu_bh_schedule(client->bh);
2864 cpu_unregister_map_client_do(client);
2868 void cpu_register_map_client(QEMUBH *bh)
2870 MapClient *client = g_malloc(sizeof(*client));
2872 qemu_mutex_lock(&map_client_list_lock);
2873 client->bh = bh;
2874 QLIST_INSERT_HEAD(&map_client_list, client, link);
2875 if (!atomic_read(&bounce.in_use)) {
2876 cpu_notify_map_clients_locked();
2878 qemu_mutex_unlock(&map_client_list_lock);
2881 void cpu_exec_init_all(void)
2883 qemu_mutex_init(&ram_list.mutex);
2884 /* The data structures we set up here depend on knowing the page size,
2885 * so no more changes can be made after this point.
2886 * In an ideal world, nothing we did before we had finished the
2887 * machine setup would care about the target page size, and we could
2888 * do this much later, rather than requiring board models to state
2889 * up front what their requirements are.
2891 finalize_target_page_bits();
2892 io_mem_init();
2893 memory_map_init();
2894 qemu_mutex_init(&map_client_list_lock);
2897 void cpu_unregister_map_client(QEMUBH *bh)
2899 MapClient *client;
2901 qemu_mutex_lock(&map_client_list_lock);
2902 QLIST_FOREACH(client, &map_client_list, link) {
2903 if (client->bh == bh) {
2904 cpu_unregister_map_client_do(client);
2905 break;
2908 qemu_mutex_unlock(&map_client_list_lock);
2911 static void cpu_notify_map_clients(void)
2913 qemu_mutex_lock(&map_client_list_lock);
2914 cpu_notify_map_clients_locked();
2915 qemu_mutex_unlock(&map_client_list_lock);
2918 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2920 MemoryRegion *mr;
2921 hwaddr l, xlat;
2923 rcu_read_lock();
2924 while (len > 0) {
2925 l = len;
2926 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2927 if (!memory_access_is_direct(mr, is_write)) {
2928 l = memory_access_size(mr, l, addr);
2929 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2930 rcu_read_unlock();
2931 return false;
2935 len -= l;
2936 addr += l;
2938 rcu_read_unlock();
2939 return true;
2942 /* Map a physical memory region into a host virtual address.
2943 * May map a subset of the requested range, given by and returned in *plen.
2944 * May return NULL if resources needed to perform the mapping are exhausted.
2945 * Use only for reads OR writes - not for read-modify-write operations.
2946 * Use cpu_register_map_client() to know when retrying the map operation is
2947 * likely to succeed.
2949 void *address_space_map(AddressSpace *as,
2950 hwaddr addr,
2951 hwaddr *plen,
2952 bool is_write)
2954 hwaddr len = *plen;
2955 hwaddr done = 0;
2956 hwaddr l, xlat, base;
2957 MemoryRegion *mr, *this_mr;
2958 void *ptr;
2960 if (len == 0) {
2961 return NULL;
2964 l = len;
2965 rcu_read_lock();
2966 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2968 if (!memory_access_is_direct(mr, is_write)) {
2969 if (atomic_xchg(&bounce.in_use, true)) {
2970 rcu_read_unlock();
2971 return NULL;
2973 /* Avoid unbounded allocations */
2974 l = MIN(l, TARGET_PAGE_SIZE);
2975 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2976 bounce.addr = addr;
2977 bounce.len = l;
2979 memory_region_ref(mr);
2980 bounce.mr = mr;
2981 if (!is_write) {
2982 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2983 bounce.buffer, l);
2986 rcu_read_unlock();
2987 *plen = l;
2988 return bounce.buffer;
2991 base = xlat;
2993 for (;;) {
2994 len -= l;
2995 addr += l;
2996 done += l;
2997 if (len == 0) {
2998 break;
3001 l = len;
3002 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
3003 if (this_mr != mr || xlat != base + done) {
3004 break;
3008 memory_region_ref(mr);
3009 *plen = done;
3010 ptr = qemu_ram_ptr_length(mr->ram_block, base, plen);
3011 rcu_read_unlock();
3013 return ptr;
3016 /* Unmaps a memory region previously mapped by address_space_map().
3017 * Will also mark the memory as dirty if is_write == 1. access_len gives
3018 * the amount of memory that was actually read or written by the caller.
3020 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3021 int is_write, hwaddr access_len)
3023 if (buffer != bounce.buffer) {
3024 MemoryRegion *mr;
3025 ram_addr_t addr1;
3027 mr = memory_region_from_host(buffer, &addr1);
3028 assert(mr != NULL);
3029 if (is_write) {
3030 invalidate_and_set_dirty(mr, addr1, access_len);
3032 if (xen_enabled()) {
3033 xen_invalidate_map_cache_entry(buffer);
3035 memory_region_unref(mr);
3036 return;
3038 if (is_write) {
3039 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
3040 bounce.buffer, access_len);
3042 qemu_vfree(bounce.buffer);
3043 bounce.buffer = NULL;
3044 memory_region_unref(bounce.mr);
3045 atomic_mb_set(&bounce.in_use, false);
3046 cpu_notify_map_clients();
3049 void *cpu_physical_memory_map(hwaddr addr,
3050 hwaddr *plen,
3051 int is_write)
3053 return address_space_map(&address_space_memory, addr, plen, is_write);
3056 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3057 int is_write, hwaddr access_len)
3059 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3062 /* warning: addr must be aligned */
3063 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3064 MemTxAttrs attrs,
3065 MemTxResult *result,
3066 enum device_endian endian)
3068 uint8_t *ptr;
3069 uint64_t val;
3070 MemoryRegion *mr;
3071 hwaddr l = 4;
3072 hwaddr addr1;
3073 MemTxResult r;
3074 bool release_lock = false;
3076 rcu_read_lock();
3077 mr = address_space_translate(as, addr, &addr1, &l, false);
3078 if (l < 4 || !memory_access_is_direct(mr, false)) {
3079 release_lock |= prepare_mmio_access(mr);
3081 /* I/O case */
3082 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3083 #if defined(TARGET_WORDS_BIGENDIAN)
3084 if (endian == DEVICE_LITTLE_ENDIAN) {
3085 val = bswap32(val);
3087 #else
3088 if (endian == DEVICE_BIG_ENDIAN) {
3089 val = bswap32(val);
3091 #endif
3092 } else {
3093 /* RAM case */
3094 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3095 switch (endian) {
3096 case DEVICE_LITTLE_ENDIAN:
3097 val = ldl_le_p(ptr);
3098 break;
3099 case DEVICE_BIG_ENDIAN:
3100 val = ldl_be_p(ptr);
3101 break;
3102 default:
3103 val = ldl_p(ptr);
3104 break;
3106 r = MEMTX_OK;
3108 if (result) {
3109 *result = r;
3111 if (release_lock) {
3112 qemu_mutex_unlock_iothread();
3114 rcu_read_unlock();
3115 return val;
3118 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3119 MemTxAttrs attrs, MemTxResult *result)
3121 return address_space_ldl_internal(as, addr, attrs, result,
3122 DEVICE_NATIVE_ENDIAN);
3125 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3126 MemTxAttrs attrs, MemTxResult *result)
3128 return address_space_ldl_internal(as, addr, attrs, result,
3129 DEVICE_LITTLE_ENDIAN);
3132 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3133 MemTxAttrs attrs, MemTxResult *result)
3135 return address_space_ldl_internal(as, addr, attrs, result,
3136 DEVICE_BIG_ENDIAN);
3139 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3141 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3144 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3146 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3149 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3151 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3154 /* warning: addr must be aligned */
3155 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3156 MemTxAttrs attrs,
3157 MemTxResult *result,
3158 enum device_endian endian)
3160 uint8_t *ptr;
3161 uint64_t val;
3162 MemoryRegion *mr;
3163 hwaddr l = 8;
3164 hwaddr addr1;
3165 MemTxResult r;
3166 bool release_lock = false;
3168 rcu_read_lock();
3169 mr = address_space_translate(as, addr, &addr1, &l,
3170 false);
3171 if (l < 8 || !memory_access_is_direct(mr, false)) {
3172 release_lock |= prepare_mmio_access(mr);
3174 /* I/O case */
3175 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3176 #if defined(TARGET_WORDS_BIGENDIAN)
3177 if (endian == DEVICE_LITTLE_ENDIAN) {
3178 val = bswap64(val);
3180 #else
3181 if (endian == DEVICE_BIG_ENDIAN) {
3182 val = bswap64(val);
3184 #endif
3185 } else {
3186 /* RAM case */
3187 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3188 switch (endian) {
3189 case DEVICE_LITTLE_ENDIAN:
3190 val = ldq_le_p(ptr);
3191 break;
3192 case DEVICE_BIG_ENDIAN:
3193 val = ldq_be_p(ptr);
3194 break;
3195 default:
3196 val = ldq_p(ptr);
3197 break;
3199 r = MEMTX_OK;
3201 if (result) {
3202 *result = r;
3204 if (release_lock) {
3205 qemu_mutex_unlock_iothread();
3207 rcu_read_unlock();
3208 return val;
3211 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3212 MemTxAttrs attrs, MemTxResult *result)
3214 return address_space_ldq_internal(as, addr, attrs, result,
3215 DEVICE_NATIVE_ENDIAN);
3218 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3219 MemTxAttrs attrs, MemTxResult *result)
3221 return address_space_ldq_internal(as, addr, attrs, result,
3222 DEVICE_LITTLE_ENDIAN);
3225 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3226 MemTxAttrs attrs, MemTxResult *result)
3228 return address_space_ldq_internal(as, addr, attrs, result,
3229 DEVICE_BIG_ENDIAN);
3232 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3234 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3237 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3239 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3242 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3244 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3247 /* XXX: optimize */
3248 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3249 MemTxAttrs attrs, MemTxResult *result)
3251 uint8_t val;
3252 MemTxResult r;
3254 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3255 if (result) {
3256 *result = r;
3258 return val;
3261 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3263 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3266 /* warning: addr must be aligned */
3267 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3268 hwaddr addr,
3269 MemTxAttrs attrs,
3270 MemTxResult *result,
3271 enum device_endian endian)
3273 uint8_t *ptr;
3274 uint64_t val;
3275 MemoryRegion *mr;
3276 hwaddr l = 2;
3277 hwaddr addr1;
3278 MemTxResult r;
3279 bool release_lock = false;
3281 rcu_read_lock();
3282 mr = address_space_translate(as, addr, &addr1, &l,
3283 false);
3284 if (l < 2 || !memory_access_is_direct(mr, false)) {
3285 release_lock |= prepare_mmio_access(mr);
3287 /* I/O case */
3288 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3289 #if defined(TARGET_WORDS_BIGENDIAN)
3290 if (endian == DEVICE_LITTLE_ENDIAN) {
3291 val = bswap16(val);
3293 #else
3294 if (endian == DEVICE_BIG_ENDIAN) {
3295 val = bswap16(val);
3297 #endif
3298 } else {
3299 /* RAM case */
3300 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3301 switch (endian) {
3302 case DEVICE_LITTLE_ENDIAN:
3303 val = lduw_le_p(ptr);
3304 break;
3305 case DEVICE_BIG_ENDIAN:
3306 val = lduw_be_p(ptr);
3307 break;
3308 default:
3309 val = lduw_p(ptr);
3310 break;
3312 r = MEMTX_OK;
3314 if (result) {
3315 *result = r;
3317 if (release_lock) {
3318 qemu_mutex_unlock_iothread();
3320 rcu_read_unlock();
3321 return val;
3324 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3325 MemTxAttrs attrs, MemTxResult *result)
3327 return address_space_lduw_internal(as, addr, attrs, result,
3328 DEVICE_NATIVE_ENDIAN);
3331 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3332 MemTxAttrs attrs, MemTxResult *result)
3334 return address_space_lduw_internal(as, addr, attrs, result,
3335 DEVICE_LITTLE_ENDIAN);
3338 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3339 MemTxAttrs attrs, MemTxResult *result)
3341 return address_space_lduw_internal(as, addr, attrs, result,
3342 DEVICE_BIG_ENDIAN);
3345 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3347 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3350 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3352 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3355 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3357 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3360 /* warning: addr must be aligned. The ram page is not masked as dirty
3361 and the code inside is not invalidated. It is useful if the dirty
3362 bits are used to track modified PTEs */
3363 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3364 MemTxAttrs attrs, MemTxResult *result)
3366 uint8_t *ptr;
3367 MemoryRegion *mr;
3368 hwaddr l = 4;
3369 hwaddr addr1;
3370 MemTxResult r;
3371 uint8_t dirty_log_mask;
3372 bool release_lock = false;
3374 rcu_read_lock();
3375 mr = address_space_translate(as, addr, &addr1, &l,
3376 true);
3377 if (l < 4 || !memory_access_is_direct(mr, true)) {
3378 release_lock |= prepare_mmio_access(mr);
3380 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3381 } else {
3382 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3383 stl_p(ptr, val);
3385 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3386 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3387 cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr,
3388 4, dirty_log_mask);
3389 r = MEMTX_OK;
3391 if (result) {
3392 *result = r;
3394 if (release_lock) {
3395 qemu_mutex_unlock_iothread();
3397 rcu_read_unlock();
3400 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3402 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3405 /* warning: addr must be aligned */
3406 static inline void address_space_stl_internal(AddressSpace *as,
3407 hwaddr addr, uint32_t val,
3408 MemTxAttrs attrs,
3409 MemTxResult *result,
3410 enum device_endian endian)
3412 uint8_t *ptr;
3413 MemoryRegion *mr;
3414 hwaddr l = 4;
3415 hwaddr addr1;
3416 MemTxResult r;
3417 bool release_lock = false;
3419 rcu_read_lock();
3420 mr = address_space_translate(as, addr, &addr1, &l,
3421 true);
3422 if (l < 4 || !memory_access_is_direct(mr, true)) {
3423 release_lock |= prepare_mmio_access(mr);
3425 #if defined(TARGET_WORDS_BIGENDIAN)
3426 if (endian == DEVICE_LITTLE_ENDIAN) {
3427 val = bswap32(val);
3429 #else
3430 if (endian == DEVICE_BIG_ENDIAN) {
3431 val = bswap32(val);
3433 #endif
3434 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3435 } else {
3436 /* RAM case */
3437 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3438 switch (endian) {
3439 case DEVICE_LITTLE_ENDIAN:
3440 stl_le_p(ptr, val);
3441 break;
3442 case DEVICE_BIG_ENDIAN:
3443 stl_be_p(ptr, val);
3444 break;
3445 default:
3446 stl_p(ptr, val);
3447 break;
3449 invalidate_and_set_dirty(mr, addr1, 4);
3450 r = MEMTX_OK;
3452 if (result) {
3453 *result = r;
3455 if (release_lock) {
3456 qemu_mutex_unlock_iothread();
3458 rcu_read_unlock();
3461 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3462 MemTxAttrs attrs, MemTxResult *result)
3464 address_space_stl_internal(as, addr, val, attrs, result,
3465 DEVICE_NATIVE_ENDIAN);
3468 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3469 MemTxAttrs attrs, MemTxResult *result)
3471 address_space_stl_internal(as, addr, val, attrs, result,
3472 DEVICE_LITTLE_ENDIAN);
3475 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3476 MemTxAttrs attrs, MemTxResult *result)
3478 address_space_stl_internal(as, addr, val, attrs, result,
3479 DEVICE_BIG_ENDIAN);
3482 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3484 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3487 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3489 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3492 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3494 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3497 /* XXX: optimize */
3498 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3499 MemTxAttrs attrs, MemTxResult *result)
3501 uint8_t v = val;
3502 MemTxResult r;
3504 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3505 if (result) {
3506 *result = r;
3510 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3512 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3515 /* warning: addr must be aligned */
3516 static inline void address_space_stw_internal(AddressSpace *as,
3517 hwaddr addr, uint32_t val,
3518 MemTxAttrs attrs,
3519 MemTxResult *result,
3520 enum device_endian endian)
3522 uint8_t *ptr;
3523 MemoryRegion *mr;
3524 hwaddr l = 2;
3525 hwaddr addr1;
3526 MemTxResult r;
3527 bool release_lock = false;
3529 rcu_read_lock();
3530 mr = address_space_translate(as, addr, &addr1, &l, true);
3531 if (l < 2 || !memory_access_is_direct(mr, true)) {
3532 release_lock |= prepare_mmio_access(mr);
3534 #if defined(TARGET_WORDS_BIGENDIAN)
3535 if (endian == DEVICE_LITTLE_ENDIAN) {
3536 val = bswap16(val);
3538 #else
3539 if (endian == DEVICE_BIG_ENDIAN) {
3540 val = bswap16(val);
3542 #endif
3543 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3544 } else {
3545 /* RAM case */
3546 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3547 switch (endian) {
3548 case DEVICE_LITTLE_ENDIAN:
3549 stw_le_p(ptr, val);
3550 break;
3551 case DEVICE_BIG_ENDIAN:
3552 stw_be_p(ptr, val);
3553 break;
3554 default:
3555 stw_p(ptr, val);
3556 break;
3558 invalidate_and_set_dirty(mr, addr1, 2);
3559 r = MEMTX_OK;
3561 if (result) {
3562 *result = r;
3564 if (release_lock) {
3565 qemu_mutex_unlock_iothread();
3567 rcu_read_unlock();
3570 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3571 MemTxAttrs attrs, MemTxResult *result)
3573 address_space_stw_internal(as, addr, val, attrs, result,
3574 DEVICE_NATIVE_ENDIAN);
3577 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3578 MemTxAttrs attrs, MemTxResult *result)
3580 address_space_stw_internal(as, addr, val, attrs, result,
3581 DEVICE_LITTLE_ENDIAN);
3584 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3585 MemTxAttrs attrs, MemTxResult *result)
3587 address_space_stw_internal(as, addr, val, attrs, result,
3588 DEVICE_BIG_ENDIAN);
3591 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3593 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3596 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3598 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3601 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3603 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3606 /* XXX: optimize */
3607 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3608 MemTxAttrs attrs, MemTxResult *result)
3610 MemTxResult r;
3611 val = tswap64(val);
3612 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3613 if (result) {
3614 *result = r;
3618 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3619 MemTxAttrs attrs, MemTxResult *result)
3621 MemTxResult r;
3622 val = cpu_to_le64(val);
3623 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3624 if (result) {
3625 *result = r;
3628 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3629 MemTxAttrs attrs, MemTxResult *result)
3631 MemTxResult r;
3632 val = cpu_to_be64(val);
3633 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3634 if (result) {
3635 *result = r;
3639 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3641 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3644 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3646 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3649 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3651 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3654 /* virtual memory access for debug (includes writing to ROM) */
3655 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3656 uint8_t *buf, int len, int is_write)
3658 int l;
3659 hwaddr phys_addr;
3660 target_ulong page;
3662 while (len > 0) {
3663 int asidx;
3664 MemTxAttrs attrs;
3666 page = addr & TARGET_PAGE_MASK;
3667 phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3668 asidx = cpu_asidx_from_attrs(cpu, attrs);
3669 /* if no physical page mapped, return an error */
3670 if (phys_addr == -1)
3671 return -1;
3672 l = (page + TARGET_PAGE_SIZE) - addr;
3673 if (l > len)
3674 l = len;
3675 phys_addr += (addr & ~TARGET_PAGE_MASK);
3676 if (is_write) {
3677 cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3678 phys_addr, buf, l);
3679 } else {
3680 address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3681 MEMTXATTRS_UNSPECIFIED,
3682 buf, l, 0);
3684 len -= l;
3685 buf += l;
3686 addr += l;
3688 return 0;
3692 * Allows code that needs to deal with migration bitmaps etc to still be built
3693 * target independent.
3695 size_t qemu_target_page_bits(void)
3697 return TARGET_PAGE_BITS;
3700 #endif
3703 * A helper function for the _utterly broken_ virtio device model to find out if
3704 * it's running on a big endian machine. Don't do this at home kids!
3706 bool target_words_bigendian(void);
3707 bool target_words_bigendian(void)
3709 #if defined(TARGET_WORDS_BIGENDIAN)
3710 return true;
3711 #else
3712 return false;
3713 #endif
3716 #ifndef CONFIG_USER_ONLY
3717 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3719 MemoryRegion*mr;
3720 hwaddr l = 1;
3721 bool res;
3723 rcu_read_lock();
3724 mr = address_space_translate(&address_space_memory,
3725 phys_addr, &phys_addr, &l, false);
3727 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3728 rcu_read_unlock();
3729 return res;
3732 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3734 RAMBlock *block;
3735 int ret = 0;
3737 rcu_read_lock();
3738 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3739 ret = func(block->idstr, block->host, block->offset,
3740 block->used_length, opaque);
3741 if (ret) {
3742 break;
3745 rcu_read_unlock();
3746 return ret;
3748 #endif