qapi: add release designator to gluster logfile option
[qemu.git] / exec.c
blob4d085812cac63c2356ce39560a67cb86f58070dc
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include "qapi/error.h"
21 #ifndef _WIN32
22 #endif
24 #include "qemu/cutils.h"
25 #include "cpu.h"
26 #include "exec/exec-all.h"
27 #include "tcg.h"
28 #include "hw/qdev-core.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
31 #include "hw/xen/xen.h"
32 #endif
33 #include "sysemu/kvm.h"
34 #include "sysemu/sysemu.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "qemu/error-report.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include "qemu.h"
40 #else /* !CONFIG_USER_ONLY */
41 #include "hw/hw.h"
42 #include "exec/memory.h"
43 #include "exec/ioport.h"
44 #include "sysemu/dma.h"
45 #include "exec/address-spaces.h"
46 #include "sysemu/xen-mapcache.h"
47 #include "trace.h"
48 #endif
49 #include "exec/cpu-all.h"
50 #include "qemu/rcu_queue.h"
51 #include "qemu/main-loop.h"
52 #include "translate-all.h"
53 #include "sysemu/replay.h"
55 #include "exec/memory-internal.h"
56 #include "exec/ram_addr.h"
57 #include "exec/log.h"
59 #include "migration/vmstate.h"
61 #include "qemu/range.h"
62 #ifndef _WIN32
63 #include "qemu/mmap-alloc.h"
64 #endif
66 //#define DEBUG_SUBPAGE
68 #if !defined(CONFIG_USER_ONLY)
69 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
70 * are protected by the ramlist lock.
72 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
74 static MemoryRegion *system_memory;
75 static MemoryRegion *system_io;
77 AddressSpace address_space_io;
78 AddressSpace address_space_memory;
80 MemoryRegion io_mem_rom, io_mem_notdirty;
81 static MemoryRegion io_mem_unassigned;
83 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
84 #define RAM_PREALLOC (1 << 0)
86 /* RAM is mmap-ed with MAP_SHARED */
87 #define RAM_SHARED (1 << 1)
89 /* Only a portion of RAM (used_length) is actually used, and migrated.
90 * This used_length size can change across reboots.
92 #define RAM_RESIZEABLE (1 << 2)
94 #endif
96 #ifdef TARGET_PAGE_BITS_VARY
97 int target_page_bits;
98 bool target_page_bits_decided;
99 #endif
101 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
102 /* current CPU in the current thread. It is only valid inside
103 cpu_exec() */
104 __thread CPUState *current_cpu;
105 /* 0 = Do not count executed instructions.
106 1 = Precise instruction counting.
107 2 = Adaptive rate instruction counting. */
108 int use_icount;
110 bool set_preferred_target_page_bits(int bits)
112 /* The target page size is the lowest common denominator for all
113 * the CPUs in the system, so we can only make it smaller, never
114 * larger. And we can't make it smaller once we've committed to
115 * a particular size.
117 #ifdef TARGET_PAGE_BITS_VARY
118 assert(bits >= TARGET_PAGE_BITS_MIN);
119 if (target_page_bits == 0 || target_page_bits > bits) {
120 if (target_page_bits_decided) {
121 return false;
123 target_page_bits = bits;
125 #endif
126 return true;
129 #if !defined(CONFIG_USER_ONLY)
131 static void finalize_target_page_bits(void)
133 #ifdef TARGET_PAGE_BITS_VARY
134 if (target_page_bits == 0) {
135 target_page_bits = TARGET_PAGE_BITS_MIN;
137 target_page_bits_decided = true;
138 #endif
141 typedef struct PhysPageEntry PhysPageEntry;
143 struct PhysPageEntry {
144 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
145 uint32_t skip : 6;
146 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
147 uint32_t ptr : 26;
150 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
152 /* Size of the L2 (and L3, etc) page tables. */
153 #define ADDR_SPACE_BITS 64
155 #define P_L2_BITS 9
156 #define P_L2_SIZE (1 << P_L2_BITS)
158 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
160 typedef PhysPageEntry Node[P_L2_SIZE];
162 typedef struct PhysPageMap {
163 struct rcu_head rcu;
165 unsigned sections_nb;
166 unsigned sections_nb_alloc;
167 unsigned nodes_nb;
168 unsigned nodes_nb_alloc;
169 Node *nodes;
170 MemoryRegionSection *sections;
171 } PhysPageMap;
173 struct AddressSpaceDispatch {
174 struct rcu_head rcu;
176 MemoryRegionSection *mru_section;
177 /* This is a multi-level map on the physical address space.
178 * The bottom level has pointers to MemoryRegionSections.
180 PhysPageEntry phys_map;
181 PhysPageMap map;
182 AddressSpace *as;
185 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
186 typedef struct subpage_t {
187 MemoryRegion iomem;
188 AddressSpace *as;
189 hwaddr base;
190 uint16_t sub_section[];
191 } subpage_t;
193 #define PHYS_SECTION_UNASSIGNED 0
194 #define PHYS_SECTION_NOTDIRTY 1
195 #define PHYS_SECTION_ROM 2
196 #define PHYS_SECTION_WATCH 3
198 static void io_mem_init(void);
199 static void memory_map_init(void);
200 static void tcg_commit(MemoryListener *listener);
202 static MemoryRegion io_mem_watch;
205 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
206 * @cpu: the CPU whose AddressSpace this is
207 * @as: the AddressSpace itself
208 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
209 * @tcg_as_listener: listener for tracking changes to the AddressSpace
211 struct CPUAddressSpace {
212 CPUState *cpu;
213 AddressSpace *as;
214 struct AddressSpaceDispatch *memory_dispatch;
215 MemoryListener tcg_as_listener;
218 #endif
220 #if !defined(CONFIG_USER_ONLY)
222 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
224 static unsigned alloc_hint = 16;
225 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
226 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, alloc_hint);
227 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
228 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
229 alloc_hint = map->nodes_nb_alloc;
233 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
235 unsigned i;
236 uint32_t ret;
237 PhysPageEntry e;
238 PhysPageEntry *p;
240 ret = map->nodes_nb++;
241 p = map->nodes[ret];
242 assert(ret != PHYS_MAP_NODE_NIL);
243 assert(ret != map->nodes_nb_alloc);
245 e.skip = leaf ? 0 : 1;
246 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
247 for (i = 0; i < P_L2_SIZE; ++i) {
248 memcpy(&p[i], &e, sizeof(e));
250 return ret;
253 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
254 hwaddr *index, hwaddr *nb, uint16_t leaf,
255 int level)
257 PhysPageEntry *p;
258 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
260 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
261 lp->ptr = phys_map_node_alloc(map, level == 0);
263 p = map->nodes[lp->ptr];
264 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
266 while (*nb && lp < &p[P_L2_SIZE]) {
267 if ((*index & (step - 1)) == 0 && *nb >= step) {
268 lp->skip = 0;
269 lp->ptr = leaf;
270 *index += step;
271 *nb -= step;
272 } else {
273 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
275 ++lp;
279 static void phys_page_set(AddressSpaceDispatch *d,
280 hwaddr index, hwaddr nb,
281 uint16_t leaf)
283 /* Wildly overreserve - it doesn't matter much. */
284 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
286 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
289 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
290 * and update our entry so we can skip it and go directly to the destination.
292 static void phys_page_compact(PhysPageEntry *lp, Node *nodes)
294 unsigned valid_ptr = P_L2_SIZE;
295 int valid = 0;
296 PhysPageEntry *p;
297 int i;
299 if (lp->ptr == PHYS_MAP_NODE_NIL) {
300 return;
303 p = nodes[lp->ptr];
304 for (i = 0; i < P_L2_SIZE; i++) {
305 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
306 continue;
309 valid_ptr = i;
310 valid++;
311 if (p[i].skip) {
312 phys_page_compact(&p[i], nodes);
316 /* We can only compress if there's only one child. */
317 if (valid != 1) {
318 return;
321 assert(valid_ptr < P_L2_SIZE);
323 /* Don't compress if it won't fit in the # of bits we have. */
324 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
325 return;
328 lp->ptr = p[valid_ptr].ptr;
329 if (!p[valid_ptr].skip) {
330 /* If our only child is a leaf, make this a leaf. */
331 /* By design, we should have made this node a leaf to begin with so we
332 * should never reach here.
333 * But since it's so simple to handle this, let's do it just in case we
334 * change this rule.
336 lp->skip = 0;
337 } else {
338 lp->skip += p[valid_ptr].skip;
342 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
344 if (d->phys_map.skip) {
345 phys_page_compact(&d->phys_map, d->map.nodes);
349 static inline bool section_covers_addr(const MemoryRegionSection *section,
350 hwaddr addr)
352 /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
353 * the section must cover the entire address space.
355 return int128_gethi(section->size) ||
356 range_covers_byte(section->offset_within_address_space,
357 int128_getlo(section->size), addr);
360 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
361 Node *nodes, MemoryRegionSection *sections)
363 PhysPageEntry *p;
364 hwaddr index = addr >> TARGET_PAGE_BITS;
365 int i;
367 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
368 if (lp.ptr == PHYS_MAP_NODE_NIL) {
369 return &sections[PHYS_SECTION_UNASSIGNED];
371 p = nodes[lp.ptr];
372 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
375 if (section_covers_addr(&sections[lp.ptr], addr)) {
376 return &sections[lp.ptr];
377 } else {
378 return &sections[PHYS_SECTION_UNASSIGNED];
382 bool memory_region_is_unassigned(MemoryRegion *mr)
384 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
385 && mr != &io_mem_watch;
388 /* Called from RCU critical section */
389 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
390 hwaddr addr,
391 bool resolve_subpage)
393 MemoryRegionSection *section = atomic_read(&d->mru_section);
394 subpage_t *subpage;
395 bool update;
397 if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
398 section_covers_addr(section, addr)) {
399 update = false;
400 } else {
401 section = phys_page_find(d->phys_map, addr, d->map.nodes,
402 d->map.sections);
403 update = true;
405 if (resolve_subpage && section->mr->subpage) {
406 subpage = container_of(section->mr, subpage_t, iomem);
407 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
409 if (update) {
410 atomic_set(&d->mru_section, section);
412 return section;
415 /* Called from RCU critical section */
416 static MemoryRegionSection *
417 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
418 hwaddr *plen, bool resolve_subpage)
420 MemoryRegionSection *section;
421 MemoryRegion *mr;
422 Int128 diff;
424 section = address_space_lookup_region(d, addr, resolve_subpage);
425 /* Compute offset within MemoryRegionSection */
426 addr -= section->offset_within_address_space;
428 /* Compute offset within MemoryRegion */
429 *xlat = addr + section->offset_within_region;
431 mr = section->mr;
433 /* MMIO registers can be expected to perform full-width accesses based only
434 * on their address, without considering adjacent registers that could
435 * decode to completely different MemoryRegions. When such registers
436 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
437 * regions overlap wildly. For this reason we cannot clamp the accesses
438 * here.
440 * If the length is small (as is the case for address_space_ldl/stl),
441 * everything works fine. If the incoming length is large, however,
442 * the caller really has to do the clamping through memory_access_size.
444 if (memory_region_is_ram(mr)) {
445 diff = int128_sub(section->size, int128_make64(addr));
446 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
448 return section;
451 /* Called from RCU critical section */
452 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
453 hwaddr *xlat, hwaddr *plen,
454 bool is_write)
456 IOMMUTLBEntry iotlb;
457 MemoryRegionSection *section;
458 MemoryRegion *mr;
460 for (;;) {
461 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
462 section = address_space_translate_internal(d, addr, &addr, plen, true);
463 mr = section->mr;
465 if (!mr->iommu_ops) {
466 break;
469 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
470 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
471 | (addr & iotlb.addr_mask));
472 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
473 if (!(iotlb.perm & (1 << is_write))) {
474 mr = &io_mem_unassigned;
475 break;
478 as = iotlb.target_as;
481 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
482 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
483 *plen = MIN(page, *plen);
486 *xlat = addr;
487 return mr;
490 /* Called from RCU critical section */
491 MemoryRegionSection *
492 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
493 hwaddr *xlat, hwaddr *plen)
495 MemoryRegionSection *section;
496 AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;
498 section = address_space_translate_internal(d, addr, xlat, plen, false);
500 assert(!section->mr->iommu_ops);
501 return section;
503 #endif
505 #if !defined(CONFIG_USER_ONLY)
507 static int cpu_common_post_load(void *opaque, int version_id)
509 CPUState *cpu = opaque;
511 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
512 version_id is increased. */
513 cpu->interrupt_request &= ~0x01;
514 tlb_flush(cpu, 1);
516 return 0;
519 static int cpu_common_pre_load(void *opaque)
521 CPUState *cpu = opaque;
523 cpu->exception_index = -1;
525 return 0;
528 static bool cpu_common_exception_index_needed(void *opaque)
530 CPUState *cpu = opaque;
532 return tcg_enabled() && cpu->exception_index != -1;
535 static const VMStateDescription vmstate_cpu_common_exception_index = {
536 .name = "cpu_common/exception_index",
537 .version_id = 1,
538 .minimum_version_id = 1,
539 .needed = cpu_common_exception_index_needed,
540 .fields = (VMStateField[]) {
541 VMSTATE_INT32(exception_index, CPUState),
542 VMSTATE_END_OF_LIST()
546 static bool cpu_common_crash_occurred_needed(void *opaque)
548 CPUState *cpu = opaque;
550 return cpu->crash_occurred;
553 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
554 .name = "cpu_common/crash_occurred",
555 .version_id = 1,
556 .minimum_version_id = 1,
557 .needed = cpu_common_crash_occurred_needed,
558 .fields = (VMStateField[]) {
559 VMSTATE_BOOL(crash_occurred, CPUState),
560 VMSTATE_END_OF_LIST()
564 const VMStateDescription vmstate_cpu_common = {
565 .name = "cpu_common",
566 .version_id = 1,
567 .minimum_version_id = 1,
568 .pre_load = cpu_common_pre_load,
569 .post_load = cpu_common_post_load,
570 .fields = (VMStateField[]) {
571 VMSTATE_UINT32(halted, CPUState),
572 VMSTATE_UINT32(interrupt_request, CPUState),
573 VMSTATE_END_OF_LIST()
575 .subsections = (const VMStateDescription*[]) {
576 &vmstate_cpu_common_exception_index,
577 &vmstate_cpu_common_crash_occurred,
578 NULL
582 #endif
584 CPUState *qemu_get_cpu(int index)
586 CPUState *cpu;
588 CPU_FOREACH(cpu) {
589 if (cpu->cpu_index == index) {
590 return cpu;
594 return NULL;
597 #if !defined(CONFIG_USER_ONLY)
598 void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
600 CPUAddressSpace *newas;
602 /* Target code should have set num_ases before calling us */
603 assert(asidx < cpu->num_ases);
605 if (asidx == 0) {
606 /* address space 0 gets the convenience alias */
607 cpu->as = as;
610 /* KVM cannot currently support multiple address spaces. */
611 assert(asidx == 0 || !kvm_enabled());
613 if (!cpu->cpu_ases) {
614 cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
617 newas = &cpu->cpu_ases[asidx];
618 newas->cpu = cpu;
619 newas->as = as;
620 if (tcg_enabled()) {
621 newas->tcg_as_listener.commit = tcg_commit;
622 memory_listener_register(&newas->tcg_as_listener, as);
626 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
628 /* Return the AddressSpace corresponding to the specified index */
629 return cpu->cpu_ases[asidx].as;
631 #endif
633 void cpu_exec_unrealizefn(CPUState *cpu)
635 CPUClass *cc = CPU_GET_CLASS(cpu);
637 cpu_list_remove(cpu);
639 if (cc->vmsd != NULL) {
640 vmstate_unregister(NULL, cc->vmsd, cpu);
642 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
643 vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
647 void cpu_exec_initfn(CPUState *cpu)
649 cpu->as = NULL;
650 cpu->num_ases = 0;
652 #ifndef CONFIG_USER_ONLY
653 cpu->thread_id = qemu_get_thread_id();
655 /* This is a softmmu CPU object, so create a property for it
656 * so users can wire up its memory. (This can't go in qom/cpu.c
657 * because that file is compiled only once for both user-mode
658 * and system builds.) The default if no link is set up is to use
659 * the system address space.
661 object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
662 (Object **)&cpu->memory,
663 qdev_prop_allow_set_link_before_realize,
664 OBJ_PROP_LINK_UNREF_ON_RELEASE,
665 &error_abort);
666 cpu->memory = system_memory;
667 object_ref(OBJECT(cpu->memory));
668 #endif
671 void cpu_exec_realizefn(CPUState *cpu, Error **errp)
673 CPUClass *cc ATTRIBUTE_UNUSED = CPU_GET_CLASS(cpu);
675 cpu_list_add(cpu);
677 #ifndef CONFIG_USER_ONLY
678 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
679 vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
681 if (cc->vmsd != NULL) {
682 vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
684 #endif
687 #if defined(CONFIG_USER_ONLY)
688 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
690 mmap_lock();
691 tb_lock();
692 tb_invalidate_phys_page_range(pc, pc + 1, 0);
693 tb_unlock();
694 mmap_unlock();
696 #else
697 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
699 MemTxAttrs attrs;
700 hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
701 int asidx = cpu_asidx_from_attrs(cpu, attrs);
702 if (phys != -1) {
703 /* Locks grabbed by tb_invalidate_phys_addr */
704 tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
705 phys | (pc & ~TARGET_PAGE_MASK));
708 #endif
710 #if defined(CONFIG_USER_ONLY)
711 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
716 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
717 int flags)
719 return -ENOSYS;
722 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
726 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
727 int flags, CPUWatchpoint **watchpoint)
729 return -ENOSYS;
731 #else
732 /* Add a watchpoint. */
733 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
734 int flags, CPUWatchpoint **watchpoint)
736 CPUWatchpoint *wp;
738 /* forbid ranges which are empty or run off the end of the address space */
739 if (len == 0 || (addr + len - 1) < addr) {
740 error_report("tried to set invalid watchpoint at %"
741 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
742 return -EINVAL;
744 wp = g_malloc(sizeof(*wp));
746 wp->vaddr = addr;
747 wp->len = len;
748 wp->flags = flags;
750 /* keep all GDB-injected watchpoints in front */
751 if (flags & BP_GDB) {
752 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
753 } else {
754 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
757 tlb_flush_page(cpu, addr);
759 if (watchpoint)
760 *watchpoint = wp;
761 return 0;
764 /* Remove a specific watchpoint. */
765 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
766 int flags)
768 CPUWatchpoint *wp;
770 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
771 if (addr == wp->vaddr && len == wp->len
772 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
773 cpu_watchpoint_remove_by_ref(cpu, wp);
774 return 0;
777 return -ENOENT;
780 /* Remove a specific watchpoint by reference. */
781 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
783 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
785 tlb_flush_page(cpu, watchpoint->vaddr);
787 g_free(watchpoint);
790 /* Remove all matching watchpoints. */
791 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
793 CPUWatchpoint *wp, *next;
795 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
796 if (wp->flags & mask) {
797 cpu_watchpoint_remove_by_ref(cpu, wp);
802 /* Return true if this watchpoint address matches the specified
803 * access (ie the address range covered by the watchpoint overlaps
804 * partially or completely with the address range covered by the
805 * access).
807 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
808 vaddr addr,
809 vaddr len)
811 /* We know the lengths are non-zero, but a little caution is
812 * required to avoid errors in the case where the range ends
813 * exactly at the top of the address space and so addr + len
814 * wraps round to zero.
816 vaddr wpend = wp->vaddr + wp->len - 1;
817 vaddr addrend = addr + len - 1;
819 return !(addr > wpend || wp->vaddr > addrend);
822 #endif
824 /* Add a breakpoint. */
825 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
826 CPUBreakpoint **breakpoint)
828 CPUBreakpoint *bp;
830 bp = g_malloc(sizeof(*bp));
832 bp->pc = pc;
833 bp->flags = flags;
835 /* keep all GDB-injected breakpoints in front */
836 if (flags & BP_GDB) {
837 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
838 } else {
839 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
842 breakpoint_invalidate(cpu, pc);
844 if (breakpoint) {
845 *breakpoint = bp;
847 return 0;
850 /* Remove a specific breakpoint. */
851 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
853 CPUBreakpoint *bp;
855 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
856 if (bp->pc == pc && bp->flags == flags) {
857 cpu_breakpoint_remove_by_ref(cpu, bp);
858 return 0;
861 return -ENOENT;
864 /* Remove a specific breakpoint by reference. */
865 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
867 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
869 breakpoint_invalidate(cpu, breakpoint->pc);
871 g_free(breakpoint);
874 /* Remove all matching breakpoints. */
875 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
877 CPUBreakpoint *bp, *next;
879 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
880 if (bp->flags & mask) {
881 cpu_breakpoint_remove_by_ref(cpu, bp);
886 /* enable or disable single step mode. EXCP_DEBUG is returned by the
887 CPU loop after each instruction */
888 void cpu_single_step(CPUState *cpu, int enabled)
890 if (cpu->singlestep_enabled != enabled) {
891 cpu->singlestep_enabled = enabled;
892 if (kvm_enabled()) {
893 kvm_update_guest_debug(cpu, 0);
894 } else {
895 /* must flush all the translated code to avoid inconsistencies */
896 /* XXX: only flush what is necessary */
897 tb_flush(cpu);
902 void cpu_abort(CPUState *cpu, const char *fmt, ...)
904 va_list ap;
905 va_list ap2;
907 va_start(ap, fmt);
908 va_copy(ap2, ap);
909 fprintf(stderr, "qemu: fatal: ");
910 vfprintf(stderr, fmt, ap);
911 fprintf(stderr, "\n");
912 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
913 if (qemu_log_separate()) {
914 qemu_log("qemu: fatal: ");
915 qemu_log_vprintf(fmt, ap2);
916 qemu_log("\n");
917 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
918 qemu_log_flush();
919 qemu_log_close();
921 va_end(ap2);
922 va_end(ap);
923 replay_finish();
924 #if defined(CONFIG_USER_ONLY)
926 struct sigaction act;
927 sigfillset(&act.sa_mask);
928 act.sa_handler = SIG_DFL;
929 sigaction(SIGABRT, &act, NULL);
931 #endif
932 abort();
935 #if !defined(CONFIG_USER_ONLY)
936 /* Called from RCU critical section */
937 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
939 RAMBlock *block;
941 block = atomic_rcu_read(&ram_list.mru_block);
942 if (block && addr - block->offset < block->max_length) {
943 return block;
945 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
946 if (addr - block->offset < block->max_length) {
947 goto found;
951 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
952 abort();
954 found:
955 /* It is safe to write mru_block outside the iothread lock. This
956 * is what happens:
958 * mru_block = xxx
959 * rcu_read_unlock()
960 * xxx removed from list
961 * rcu_read_lock()
962 * read mru_block
963 * mru_block = NULL;
964 * call_rcu(reclaim_ramblock, xxx);
965 * rcu_read_unlock()
967 * atomic_rcu_set is not needed here. The block was already published
968 * when it was placed into the list. Here we're just making an extra
969 * copy of the pointer.
971 ram_list.mru_block = block;
972 return block;
975 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
977 CPUState *cpu;
978 ram_addr_t start1;
979 RAMBlock *block;
980 ram_addr_t end;
982 end = TARGET_PAGE_ALIGN(start + length);
983 start &= TARGET_PAGE_MASK;
985 rcu_read_lock();
986 block = qemu_get_ram_block(start);
987 assert(block == qemu_get_ram_block(end - 1));
988 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
989 CPU_FOREACH(cpu) {
990 tlb_reset_dirty(cpu, start1, length);
992 rcu_read_unlock();
995 /* Note: start and end must be within the same ram block. */
996 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
997 ram_addr_t length,
998 unsigned client)
1000 DirtyMemoryBlocks *blocks;
1001 unsigned long end, page;
1002 bool dirty = false;
1004 if (length == 0) {
1005 return false;
1008 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
1009 page = start >> TARGET_PAGE_BITS;
1011 rcu_read_lock();
1013 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1015 while (page < end) {
1016 unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1017 unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1018 unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1020 dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1021 offset, num);
1022 page += num;
1025 rcu_read_unlock();
1027 if (dirty && tcg_enabled()) {
1028 tlb_reset_dirty_range_all(start, length);
1031 return dirty;
1034 /* Called from RCU critical section */
1035 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1036 MemoryRegionSection *section,
1037 target_ulong vaddr,
1038 hwaddr paddr, hwaddr xlat,
1039 int prot,
1040 target_ulong *address)
1042 hwaddr iotlb;
1043 CPUWatchpoint *wp;
1045 if (memory_region_is_ram(section->mr)) {
1046 /* Normal RAM. */
1047 iotlb = memory_region_get_ram_addr(section->mr) + xlat;
1048 if (!section->readonly) {
1049 iotlb |= PHYS_SECTION_NOTDIRTY;
1050 } else {
1051 iotlb |= PHYS_SECTION_ROM;
1053 } else {
1054 AddressSpaceDispatch *d;
1056 d = atomic_rcu_read(&section->address_space->dispatch);
1057 iotlb = section - d->map.sections;
1058 iotlb += xlat;
1061 /* Make accesses to pages with watchpoints go via the
1062 watchpoint trap routines. */
1063 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1064 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1065 /* Avoid trapping reads of pages with a write breakpoint. */
1066 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1067 iotlb = PHYS_SECTION_WATCH + paddr;
1068 *address |= TLB_MMIO;
1069 break;
1074 return iotlb;
1076 #endif /* defined(CONFIG_USER_ONLY) */
1078 #if !defined(CONFIG_USER_ONLY)
1080 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1081 uint16_t section);
1082 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1084 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1085 qemu_anon_ram_alloc;
1088 * Set a custom physical guest memory alloator.
1089 * Accelerators with unusual needs may need this. Hopefully, we can
1090 * get rid of it eventually.
1092 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1094 phys_mem_alloc = alloc;
1097 static uint16_t phys_section_add(PhysPageMap *map,
1098 MemoryRegionSection *section)
1100 /* The physical section number is ORed with a page-aligned
1101 * pointer to produce the iotlb entries. Thus it should
1102 * never overflow into the page-aligned value.
1104 assert(map->sections_nb < TARGET_PAGE_SIZE);
1106 if (map->sections_nb == map->sections_nb_alloc) {
1107 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1108 map->sections = g_renew(MemoryRegionSection, map->sections,
1109 map->sections_nb_alloc);
1111 map->sections[map->sections_nb] = *section;
1112 memory_region_ref(section->mr);
1113 return map->sections_nb++;
1116 static void phys_section_destroy(MemoryRegion *mr)
1118 bool have_sub_page = mr->subpage;
1120 memory_region_unref(mr);
1122 if (have_sub_page) {
1123 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1124 object_unref(OBJECT(&subpage->iomem));
1125 g_free(subpage);
1129 static void phys_sections_free(PhysPageMap *map)
1131 while (map->sections_nb > 0) {
1132 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1133 phys_section_destroy(section->mr);
1135 g_free(map->sections);
1136 g_free(map->nodes);
1139 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1141 subpage_t *subpage;
1142 hwaddr base = section->offset_within_address_space
1143 & TARGET_PAGE_MASK;
1144 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1145 d->map.nodes, d->map.sections);
1146 MemoryRegionSection subsection = {
1147 .offset_within_address_space = base,
1148 .size = int128_make64(TARGET_PAGE_SIZE),
1150 hwaddr start, end;
1152 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1154 if (!(existing->mr->subpage)) {
1155 subpage = subpage_init(d->as, base);
1156 subsection.address_space = d->as;
1157 subsection.mr = &subpage->iomem;
1158 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1159 phys_section_add(&d->map, &subsection));
1160 } else {
1161 subpage = container_of(existing->mr, subpage_t, iomem);
1163 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1164 end = start + int128_get64(section->size) - 1;
1165 subpage_register(subpage, start, end,
1166 phys_section_add(&d->map, section));
1170 static void register_multipage(AddressSpaceDispatch *d,
1171 MemoryRegionSection *section)
1173 hwaddr start_addr = section->offset_within_address_space;
1174 uint16_t section_index = phys_section_add(&d->map, section);
1175 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1176 TARGET_PAGE_BITS));
1178 assert(num_pages);
1179 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1182 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1184 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1185 AddressSpaceDispatch *d = as->next_dispatch;
1186 MemoryRegionSection now = *section, remain = *section;
1187 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1189 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1190 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1191 - now.offset_within_address_space;
1193 now.size = int128_min(int128_make64(left), now.size);
1194 register_subpage(d, &now);
1195 } else {
1196 now.size = int128_zero();
1198 while (int128_ne(remain.size, now.size)) {
1199 remain.size = int128_sub(remain.size, now.size);
1200 remain.offset_within_address_space += int128_get64(now.size);
1201 remain.offset_within_region += int128_get64(now.size);
1202 now = remain;
1203 if (int128_lt(remain.size, page_size)) {
1204 register_subpage(d, &now);
1205 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1206 now.size = page_size;
1207 register_subpage(d, &now);
1208 } else {
1209 now.size = int128_and(now.size, int128_neg(page_size));
1210 register_multipage(d, &now);
1215 void qemu_flush_coalesced_mmio_buffer(void)
1217 if (kvm_enabled())
1218 kvm_flush_coalesced_mmio_buffer();
1221 void qemu_mutex_lock_ramlist(void)
1223 qemu_mutex_lock(&ram_list.mutex);
1226 void qemu_mutex_unlock_ramlist(void)
1228 qemu_mutex_unlock(&ram_list.mutex);
1231 #ifdef __linux__
1232 static void *file_ram_alloc(RAMBlock *block,
1233 ram_addr_t memory,
1234 const char *path,
1235 Error **errp)
1237 bool unlink_on_error = false;
1238 char *filename;
1239 char *sanitized_name;
1240 char *c;
1241 void *area = MAP_FAILED;
1242 int fd = -1;
1244 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1245 error_setg(errp,
1246 "host lacks kvm mmu notifiers, -mem-path unsupported");
1247 return NULL;
1250 for (;;) {
1251 fd = open(path, O_RDWR);
1252 if (fd >= 0) {
1253 /* @path names an existing file, use it */
1254 break;
1256 if (errno == ENOENT) {
1257 /* @path names a file that doesn't exist, create it */
1258 fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1259 if (fd >= 0) {
1260 unlink_on_error = true;
1261 break;
1263 } else if (errno == EISDIR) {
1264 /* @path names a directory, create a file there */
1265 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1266 sanitized_name = g_strdup(memory_region_name(block->mr));
1267 for (c = sanitized_name; *c != '\0'; c++) {
1268 if (*c == '/') {
1269 *c = '_';
1273 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1274 sanitized_name);
1275 g_free(sanitized_name);
1277 fd = mkstemp(filename);
1278 if (fd >= 0) {
1279 unlink(filename);
1280 g_free(filename);
1281 break;
1283 g_free(filename);
1285 if (errno != EEXIST && errno != EINTR) {
1286 error_setg_errno(errp, errno,
1287 "can't open backing store %s for guest RAM",
1288 path);
1289 goto error;
1292 * Try again on EINTR and EEXIST. The latter happens when
1293 * something else creates the file between our two open().
1297 block->page_size = qemu_fd_getpagesize(fd);
1298 block->mr->align = block->page_size;
1299 #if defined(__s390x__)
1300 if (kvm_enabled()) {
1301 block->mr->align = MAX(block->mr->align, QEMU_VMALLOC_ALIGN);
1303 #endif
1305 if (memory < block->page_size) {
1306 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1307 "or larger than page size 0x%zx",
1308 memory, block->page_size);
1309 goto error;
1312 memory = ROUND_UP(memory, block->page_size);
1315 * ftruncate is not supported by hugetlbfs in older
1316 * hosts, so don't bother bailing out on errors.
1317 * If anything goes wrong with it under other filesystems,
1318 * mmap will fail.
1320 if (ftruncate(fd, memory)) {
1321 perror("ftruncate");
1324 area = qemu_ram_mmap(fd, memory, block->mr->align,
1325 block->flags & RAM_SHARED);
1326 if (area == MAP_FAILED) {
1327 error_setg_errno(errp, errno,
1328 "unable to map backing store for guest RAM");
1329 goto error;
1332 if (mem_prealloc) {
1333 os_mem_prealloc(fd, area, memory, errp);
1334 if (errp && *errp) {
1335 goto error;
1339 block->fd = fd;
1340 return area;
1342 error:
1343 if (area != MAP_FAILED) {
1344 qemu_ram_munmap(area, memory);
1346 if (unlink_on_error) {
1347 unlink(path);
1349 if (fd != -1) {
1350 close(fd);
1352 return NULL;
1354 #endif
1356 /* Called with the ramlist lock held. */
1357 static ram_addr_t find_ram_offset(ram_addr_t size)
1359 RAMBlock *block, *next_block;
1360 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1362 assert(size != 0); /* it would hand out same offset multiple times */
1364 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1365 return 0;
1368 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1369 ram_addr_t end, next = RAM_ADDR_MAX;
1371 end = block->offset + block->max_length;
1373 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1374 if (next_block->offset >= end) {
1375 next = MIN(next, next_block->offset);
1378 if (next - end >= size && next - end < mingap) {
1379 offset = end;
1380 mingap = next - end;
1384 if (offset == RAM_ADDR_MAX) {
1385 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1386 (uint64_t)size);
1387 abort();
1390 return offset;
1393 ram_addr_t last_ram_offset(void)
1395 RAMBlock *block;
1396 ram_addr_t last = 0;
1398 rcu_read_lock();
1399 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1400 last = MAX(last, block->offset + block->max_length);
1402 rcu_read_unlock();
1403 return last;
1406 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1408 int ret;
1410 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1411 if (!machine_dump_guest_core(current_machine)) {
1412 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1413 if (ret) {
1414 perror("qemu_madvise");
1415 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1416 "but dump_guest_core=off specified\n");
1421 const char *qemu_ram_get_idstr(RAMBlock *rb)
1423 return rb->idstr;
1426 /* Called with iothread lock held. */
1427 void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
1429 RAMBlock *block;
1431 assert(new_block);
1432 assert(!new_block->idstr[0]);
1434 if (dev) {
1435 char *id = qdev_get_dev_path(dev);
1436 if (id) {
1437 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1438 g_free(id);
1441 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1443 rcu_read_lock();
1444 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1445 if (block != new_block &&
1446 !strcmp(block->idstr, new_block->idstr)) {
1447 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1448 new_block->idstr);
1449 abort();
1452 rcu_read_unlock();
1455 /* Called with iothread lock held. */
1456 void qemu_ram_unset_idstr(RAMBlock *block)
1458 /* FIXME: arch_init.c assumes that this is not called throughout
1459 * migration. Ignore the problem since hot-unplug during migration
1460 * does not work anyway.
1462 if (block) {
1463 memset(block->idstr, 0, sizeof(block->idstr));
1467 size_t qemu_ram_pagesize(RAMBlock *rb)
1469 return rb->page_size;
1472 static int memory_try_enable_merging(void *addr, size_t len)
1474 if (!machine_mem_merge(current_machine)) {
1475 /* disabled by the user */
1476 return 0;
1479 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1482 /* Only legal before guest might have detected the memory size: e.g. on
1483 * incoming migration, or right after reset.
1485 * As memory core doesn't know how is memory accessed, it is up to
1486 * resize callback to update device state and/or add assertions to detect
1487 * misuse, if necessary.
1489 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
1491 assert(block);
1493 newsize = HOST_PAGE_ALIGN(newsize);
1495 if (block->used_length == newsize) {
1496 return 0;
1499 if (!(block->flags & RAM_RESIZEABLE)) {
1500 error_setg_errno(errp, EINVAL,
1501 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1502 " in != 0x" RAM_ADDR_FMT, block->idstr,
1503 newsize, block->used_length);
1504 return -EINVAL;
1507 if (block->max_length < newsize) {
1508 error_setg_errno(errp, EINVAL,
1509 "Length too large: %s: 0x" RAM_ADDR_FMT
1510 " > 0x" RAM_ADDR_FMT, block->idstr,
1511 newsize, block->max_length);
1512 return -EINVAL;
1515 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1516 block->used_length = newsize;
1517 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1518 DIRTY_CLIENTS_ALL);
1519 memory_region_set_size(block->mr, newsize);
1520 if (block->resized) {
1521 block->resized(block->idstr, newsize, block->host);
1523 return 0;
1526 /* Called with ram_list.mutex held */
1527 static void dirty_memory_extend(ram_addr_t old_ram_size,
1528 ram_addr_t new_ram_size)
1530 ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1531 DIRTY_MEMORY_BLOCK_SIZE);
1532 ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1533 DIRTY_MEMORY_BLOCK_SIZE);
1534 int i;
1536 /* Only need to extend if block count increased */
1537 if (new_num_blocks <= old_num_blocks) {
1538 return;
1541 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1542 DirtyMemoryBlocks *old_blocks;
1543 DirtyMemoryBlocks *new_blocks;
1544 int j;
1546 old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1547 new_blocks = g_malloc(sizeof(*new_blocks) +
1548 sizeof(new_blocks->blocks[0]) * new_num_blocks);
1550 if (old_num_blocks) {
1551 memcpy(new_blocks->blocks, old_blocks->blocks,
1552 old_num_blocks * sizeof(old_blocks->blocks[0]));
1555 for (j = old_num_blocks; j < new_num_blocks; j++) {
1556 new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1559 atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1561 if (old_blocks) {
1562 g_free_rcu(old_blocks, rcu);
1567 static void ram_block_add(RAMBlock *new_block, Error **errp)
1569 RAMBlock *block;
1570 RAMBlock *last_block = NULL;
1571 ram_addr_t old_ram_size, new_ram_size;
1572 Error *err = NULL;
1574 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1576 qemu_mutex_lock_ramlist();
1577 new_block->offset = find_ram_offset(new_block->max_length);
1579 if (!new_block->host) {
1580 if (xen_enabled()) {
1581 xen_ram_alloc(new_block->offset, new_block->max_length,
1582 new_block->mr, &err);
1583 if (err) {
1584 error_propagate(errp, err);
1585 qemu_mutex_unlock_ramlist();
1586 return;
1588 } else {
1589 new_block->host = phys_mem_alloc(new_block->max_length,
1590 &new_block->mr->align);
1591 if (!new_block->host) {
1592 error_setg_errno(errp, errno,
1593 "cannot set up guest memory '%s'",
1594 memory_region_name(new_block->mr));
1595 qemu_mutex_unlock_ramlist();
1596 return;
1598 memory_try_enable_merging(new_block->host, new_block->max_length);
1602 new_ram_size = MAX(old_ram_size,
1603 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1604 if (new_ram_size > old_ram_size) {
1605 migration_bitmap_extend(old_ram_size, new_ram_size);
1606 dirty_memory_extend(old_ram_size, new_ram_size);
1608 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1609 * QLIST (which has an RCU-friendly variant) does not have insertion at
1610 * tail, so save the last element in last_block.
1612 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1613 last_block = block;
1614 if (block->max_length < new_block->max_length) {
1615 break;
1618 if (block) {
1619 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1620 } else if (last_block) {
1621 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1622 } else { /* list is empty */
1623 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1625 ram_list.mru_block = NULL;
1627 /* Write list before version */
1628 smp_wmb();
1629 ram_list.version++;
1630 qemu_mutex_unlock_ramlist();
1632 cpu_physical_memory_set_dirty_range(new_block->offset,
1633 new_block->used_length,
1634 DIRTY_CLIENTS_ALL);
1636 if (new_block->host) {
1637 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1638 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1639 /* MADV_DONTFORK is also needed by KVM in absence of synchronous MMU */
1640 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1644 #ifdef __linux__
1645 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1646 bool share, const char *mem_path,
1647 Error **errp)
1649 RAMBlock *new_block;
1650 Error *local_err = NULL;
1652 if (xen_enabled()) {
1653 error_setg(errp, "-mem-path not supported with Xen");
1654 return NULL;
1657 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1659 * file_ram_alloc() needs to allocate just like
1660 * phys_mem_alloc, but we haven't bothered to provide
1661 * a hook there.
1663 error_setg(errp,
1664 "-mem-path not supported with this accelerator");
1665 return NULL;
1668 size = HOST_PAGE_ALIGN(size);
1669 new_block = g_malloc0(sizeof(*new_block));
1670 new_block->mr = mr;
1671 new_block->used_length = size;
1672 new_block->max_length = size;
1673 new_block->flags = share ? RAM_SHARED : 0;
1674 new_block->host = file_ram_alloc(new_block, size,
1675 mem_path, errp);
1676 if (!new_block->host) {
1677 g_free(new_block);
1678 return NULL;
1681 ram_block_add(new_block, &local_err);
1682 if (local_err) {
1683 g_free(new_block);
1684 error_propagate(errp, local_err);
1685 return NULL;
1687 return new_block;
1689 #endif
1691 static
1692 RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1693 void (*resized)(const char*,
1694 uint64_t length,
1695 void *host),
1696 void *host, bool resizeable,
1697 MemoryRegion *mr, Error **errp)
1699 RAMBlock *new_block;
1700 Error *local_err = NULL;
1702 size = HOST_PAGE_ALIGN(size);
1703 max_size = HOST_PAGE_ALIGN(max_size);
1704 new_block = g_malloc0(sizeof(*new_block));
1705 new_block->mr = mr;
1706 new_block->resized = resized;
1707 new_block->used_length = size;
1708 new_block->max_length = max_size;
1709 assert(max_size >= size);
1710 new_block->fd = -1;
1711 new_block->page_size = getpagesize();
1712 new_block->host = host;
1713 if (host) {
1714 new_block->flags |= RAM_PREALLOC;
1716 if (resizeable) {
1717 new_block->flags |= RAM_RESIZEABLE;
1719 ram_block_add(new_block, &local_err);
1720 if (local_err) {
1721 g_free(new_block);
1722 error_propagate(errp, local_err);
1723 return NULL;
1725 return new_block;
1728 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1729 MemoryRegion *mr, Error **errp)
1731 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1734 RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1736 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1739 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1740 void (*resized)(const char*,
1741 uint64_t length,
1742 void *host),
1743 MemoryRegion *mr, Error **errp)
1745 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1748 static void reclaim_ramblock(RAMBlock *block)
1750 if (block->flags & RAM_PREALLOC) {
1752 } else if (xen_enabled()) {
1753 xen_invalidate_map_cache_entry(block->host);
1754 #ifndef _WIN32
1755 } else if (block->fd >= 0) {
1756 qemu_ram_munmap(block->host, block->max_length);
1757 close(block->fd);
1758 #endif
1759 } else {
1760 qemu_anon_ram_free(block->host, block->max_length);
1762 g_free(block);
1765 void qemu_ram_free(RAMBlock *block)
1767 if (!block) {
1768 return;
1771 qemu_mutex_lock_ramlist();
1772 QLIST_REMOVE_RCU(block, next);
1773 ram_list.mru_block = NULL;
1774 /* Write list before version */
1775 smp_wmb();
1776 ram_list.version++;
1777 call_rcu(block, reclaim_ramblock, rcu);
1778 qemu_mutex_unlock_ramlist();
1781 #ifndef _WIN32
1782 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1784 RAMBlock *block;
1785 ram_addr_t offset;
1786 int flags;
1787 void *area, *vaddr;
1789 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1790 offset = addr - block->offset;
1791 if (offset < block->max_length) {
1792 vaddr = ramblock_ptr(block, offset);
1793 if (block->flags & RAM_PREALLOC) {
1795 } else if (xen_enabled()) {
1796 abort();
1797 } else {
1798 flags = MAP_FIXED;
1799 if (block->fd >= 0) {
1800 flags |= (block->flags & RAM_SHARED ?
1801 MAP_SHARED : MAP_PRIVATE);
1802 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1803 flags, block->fd, offset);
1804 } else {
1806 * Remap needs to match alloc. Accelerators that
1807 * set phys_mem_alloc never remap. If they did,
1808 * we'd need a remap hook here.
1810 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1812 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1813 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1814 flags, -1, 0);
1816 if (area != vaddr) {
1817 fprintf(stderr, "Could not remap addr: "
1818 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1819 length, addr);
1820 exit(1);
1822 memory_try_enable_merging(vaddr, length);
1823 qemu_ram_setup_dump(vaddr, length);
1828 #endif /* !_WIN32 */
1830 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1831 * This should not be used for general purpose DMA. Use address_space_map
1832 * or address_space_rw instead. For local memory (e.g. video ram) that the
1833 * device owns, use memory_region_get_ram_ptr.
1835 * Called within RCU critical section.
1837 void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1839 RAMBlock *block = ram_block;
1841 if (block == NULL) {
1842 block = qemu_get_ram_block(addr);
1843 addr -= block->offset;
1846 if (xen_enabled() && block->host == NULL) {
1847 /* We need to check if the requested address is in the RAM
1848 * because we don't want to map the entire memory in QEMU.
1849 * In that case just map until the end of the page.
1851 if (block->offset == 0) {
1852 return xen_map_cache(addr, 0, 0);
1855 block->host = xen_map_cache(block->offset, block->max_length, 1);
1857 return ramblock_ptr(block, addr);
1860 /* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr
1861 * but takes a size argument.
1863 * Called within RCU critical section.
1865 static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1866 hwaddr *size)
1868 RAMBlock *block = ram_block;
1869 if (*size == 0) {
1870 return NULL;
1873 if (block == NULL) {
1874 block = qemu_get_ram_block(addr);
1875 addr -= block->offset;
1877 *size = MIN(*size, block->max_length - addr);
1879 if (xen_enabled() && block->host == NULL) {
1880 /* We need to check if the requested address is in the RAM
1881 * because we don't want to map the entire memory in QEMU.
1882 * In that case just map the requested area.
1884 if (block->offset == 0) {
1885 return xen_map_cache(addr, *size, 1);
1888 block->host = xen_map_cache(block->offset, block->max_length, 1);
1891 return ramblock_ptr(block, addr);
1895 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1896 * in that RAMBlock.
1898 * ptr: Host pointer to look up
1899 * round_offset: If true round the result offset down to a page boundary
1900 * *ram_addr: set to result ram_addr
1901 * *offset: set to result offset within the RAMBlock
1903 * Returns: RAMBlock (or NULL if not found)
1905 * By the time this function returns, the returned pointer is not protected
1906 * by RCU anymore. If the caller is not within an RCU critical section and
1907 * does not hold the iothread lock, it must have other means of protecting the
1908 * pointer, such as a reference to the region that includes the incoming
1909 * ram_addr_t.
1911 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1912 ram_addr_t *offset)
1914 RAMBlock *block;
1915 uint8_t *host = ptr;
1917 if (xen_enabled()) {
1918 ram_addr_t ram_addr;
1919 rcu_read_lock();
1920 ram_addr = xen_ram_addr_from_mapcache(ptr);
1921 block = qemu_get_ram_block(ram_addr);
1922 if (block) {
1923 *offset = ram_addr - block->offset;
1925 rcu_read_unlock();
1926 return block;
1929 rcu_read_lock();
1930 block = atomic_rcu_read(&ram_list.mru_block);
1931 if (block && block->host && host - block->host < block->max_length) {
1932 goto found;
1935 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1936 /* This case append when the block is not mapped. */
1937 if (block->host == NULL) {
1938 continue;
1940 if (host - block->host < block->max_length) {
1941 goto found;
1945 rcu_read_unlock();
1946 return NULL;
1948 found:
1949 *offset = (host - block->host);
1950 if (round_offset) {
1951 *offset &= TARGET_PAGE_MASK;
1953 rcu_read_unlock();
1954 return block;
1958 * Finds the named RAMBlock
1960 * name: The name of RAMBlock to find
1962 * Returns: RAMBlock (or NULL if not found)
1964 RAMBlock *qemu_ram_block_by_name(const char *name)
1966 RAMBlock *block;
1968 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1969 if (!strcmp(name, block->idstr)) {
1970 return block;
1974 return NULL;
1977 /* Some of the softmmu routines need to translate from a host pointer
1978 (typically a TLB entry) back to a ram offset. */
1979 ram_addr_t qemu_ram_addr_from_host(void *ptr)
1981 RAMBlock *block;
1982 ram_addr_t offset;
1984 block = qemu_ram_block_from_host(ptr, false, &offset);
1985 if (!block) {
1986 return RAM_ADDR_INVALID;
1989 return block->offset + offset;
1992 /* Called within RCU critical section. */
1993 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1994 uint64_t val, unsigned size)
1996 bool locked = false;
1998 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1999 locked = true;
2000 tb_lock();
2001 tb_invalidate_phys_page_fast(ram_addr, size);
2003 switch (size) {
2004 case 1:
2005 stb_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2006 break;
2007 case 2:
2008 stw_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2009 break;
2010 case 4:
2011 stl_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2012 break;
2013 default:
2014 abort();
2017 if (locked) {
2018 tb_unlock();
2021 /* Set both VGA and migration bits for simplicity and to remove
2022 * the notdirty callback faster.
2024 cpu_physical_memory_set_dirty_range(ram_addr, size,
2025 DIRTY_CLIENTS_NOCODE);
2026 /* we remove the notdirty callback only if the code has been
2027 flushed */
2028 if (!cpu_physical_memory_is_clean(ram_addr)) {
2029 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2033 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2034 unsigned size, bool is_write)
2036 return is_write;
2039 static const MemoryRegionOps notdirty_mem_ops = {
2040 .write = notdirty_mem_write,
2041 .valid.accepts = notdirty_mem_accepts,
2042 .endianness = DEVICE_NATIVE_ENDIAN,
2045 /* Generate a debug exception if a watchpoint has been hit. */
2046 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2048 CPUState *cpu = current_cpu;
2049 CPUClass *cc = CPU_GET_CLASS(cpu);
2050 CPUArchState *env = cpu->env_ptr;
2051 target_ulong pc, cs_base;
2052 target_ulong vaddr;
2053 CPUWatchpoint *wp;
2054 uint32_t cpu_flags;
2056 if (cpu->watchpoint_hit) {
2057 /* We re-entered the check after replacing the TB. Now raise
2058 * the debug interrupt so that is will trigger after the
2059 * current instruction. */
2060 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2061 return;
2063 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2064 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2065 if (cpu_watchpoint_address_matches(wp, vaddr, len)
2066 && (wp->flags & flags)) {
2067 if (flags == BP_MEM_READ) {
2068 wp->flags |= BP_WATCHPOINT_HIT_READ;
2069 } else {
2070 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2072 wp->hitaddr = vaddr;
2073 wp->hitattrs = attrs;
2074 if (!cpu->watchpoint_hit) {
2075 if (wp->flags & BP_CPU &&
2076 !cc->debug_check_watchpoint(cpu, wp)) {
2077 wp->flags &= ~BP_WATCHPOINT_HIT;
2078 continue;
2080 cpu->watchpoint_hit = wp;
2082 /* The tb_lock will be reset when cpu_loop_exit or
2083 * cpu_loop_exit_noexc longjmp back into the cpu_exec
2084 * main loop.
2086 tb_lock();
2087 tb_check_watchpoint(cpu);
2088 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2089 cpu->exception_index = EXCP_DEBUG;
2090 cpu_loop_exit(cpu);
2091 } else {
2092 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2093 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2094 cpu_loop_exit_noexc(cpu);
2097 } else {
2098 wp->flags &= ~BP_WATCHPOINT_HIT;
2103 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2104 so these check for a hit then pass through to the normal out-of-line
2105 phys routines. */
2106 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2107 unsigned size, MemTxAttrs attrs)
2109 MemTxResult res;
2110 uint64_t data;
2111 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2112 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2114 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2115 switch (size) {
2116 case 1:
2117 data = address_space_ldub(as, addr, attrs, &res);
2118 break;
2119 case 2:
2120 data = address_space_lduw(as, addr, attrs, &res);
2121 break;
2122 case 4:
2123 data = address_space_ldl(as, addr, attrs, &res);
2124 break;
2125 default: abort();
2127 *pdata = data;
2128 return res;
2131 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2132 uint64_t val, unsigned size,
2133 MemTxAttrs attrs)
2135 MemTxResult res;
2136 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2137 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2139 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2140 switch (size) {
2141 case 1:
2142 address_space_stb(as, addr, val, attrs, &res);
2143 break;
2144 case 2:
2145 address_space_stw(as, addr, val, attrs, &res);
2146 break;
2147 case 4:
2148 address_space_stl(as, addr, val, attrs, &res);
2149 break;
2150 default: abort();
2152 return res;
2155 static const MemoryRegionOps watch_mem_ops = {
2156 .read_with_attrs = watch_mem_read,
2157 .write_with_attrs = watch_mem_write,
2158 .endianness = DEVICE_NATIVE_ENDIAN,
2161 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2162 unsigned len, MemTxAttrs attrs)
2164 subpage_t *subpage = opaque;
2165 uint8_t buf[8];
2166 MemTxResult res;
2168 #if defined(DEBUG_SUBPAGE)
2169 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2170 subpage, len, addr);
2171 #endif
2172 res = address_space_read(subpage->as, addr + subpage->base,
2173 attrs, buf, len);
2174 if (res) {
2175 return res;
2177 switch (len) {
2178 case 1:
2179 *data = ldub_p(buf);
2180 return MEMTX_OK;
2181 case 2:
2182 *data = lduw_p(buf);
2183 return MEMTX_OK;
2184 case 4:
2185 *data = ldl_p(buf);
2186 return MEMTX_OK;
2187 case 8:
2188 *data = ldq_p(buf);
2189 return MEMTX_OK;
2190 default:
2191 abort();
2195 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2196 uint64_t value, unsigned len, MemTxAttrs attrs)
2198 subpage_t *subpage = opaque;
2199 uint8_t buf[8];
2201 #if defined(DEBUG_SUBPAGE)
2202 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2203 " value %"PRIx64"\n",
2204 __func__, subpage, len, addr, value);
2205 #endif
2206 switch (len) {
2207 case 1:
2208 stb_p(buf, value);
2209 break;
2210 case 2:
2211 stw_p(buf, value);
2212 break;
2213 case 4:
2214 stl_p(buf, value);
2215 break;
2216 case 8:
2217 stq_p(buf, value);
2218 break;
2219 default:
2220 abort();
2222 return address_space_write(subpage->as, addr + subpage->base,
2223 attrs, buf, len);
2226 static bool subpage_accepts(void *opaque, hwaddr addr,
2227 unsigned len, bool is_write)
2229 subpage_t *subpage = opaque;
2230 #if defined(DEBUG_SUBPAGE)
2231 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2232 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2233 #endif
2235 return address_space_access_valid(subpage->as, addr + subpage->base,
2236 len, is_write);
2239 static const MemoryRegionOps subpage_ops = {
2240 .read_with_attrs = subpage_read,
2241 .write_with_attrs = subpage_write,
2242 .impl.min_access_size = 1,
2243 .impl.max_access_size = 8,
2244 .valid.min_access_size = 1,
2245 .valid.max_access_size = 8,
2246 .valid.accepts = subpage_accepts,
2247 .endianness = DEVICE_NATIVE_ENDIAN,
2250 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2251 uint16_t section)
2253 int idx, eidx;
2255 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2256 return -1;
2257 idx = SUBPAGE_IDX(start);
2258 eidx = SUBPAGE_IDX(end);
2259 #if defined(DEBUG_SUBPAGE)
2260 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2261 __func__, mmio, start, end, idx, eidx, section);
2262 #endif
2263 for (; idx <= eidx; idx++) {
2264 mmio->sub_section[idx] = section;
2267 return 0;
2270 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2272 subpage_t *mmio;
2274 mmio = g_malloc0(sizeof(subpage_t) + TARGET_PAGE_SIZE * sizeof(uint16_t));
2275 mmio->as = as;
2276 mmio->base = base;
2277 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2278 NULL, TARGET_PAGE_SIZE);
2279 mmio->iomem.subpage = true;
2280 #if defined(DEBUG_SUBPAGE)
2281 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2282 mmio, base, TARGET_PAGE_SIZE);
2283 #endif
2284 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2286 return mmio;
2289 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2290 MemoryRegion *mr)
2292 assert(as);
2293 MemoryRegionSection section = {
2294 .address_space = as,
2295 .mr = mr,
2296 .offset_within_address_space = 0,
2297 .offset_within_region = 0,
2298 .size = int128_2_64(),
2301 return phys_section_add(map, &section);
2304 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2306 int asidx = cpu_asidx_from_attrs(cpu, attrs);
2307 CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2308 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2309 MemoryRegionSection *sections = d->map.sections;
2311 return sections[index & ~TARGET_PAGE_MASK].mr;
2314 static void io_mem_init(void)
2316 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2317 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2318 NULL, UINT64_MAX);
2319 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2320 NULL, UINT64_MAX);
2321 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2322 NULL, UINT64_MAX);
2325 static void mem_begin(MemoryListener *listener)
2327 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2328 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2329 uint16_t n;
2331 n = dummy_section(&d->map, as, &io_mem_unassigned);
2332 assert(n == PHYS_SECTION_UNASSIGNED);
2333 n = dummy_section(&d->map, as, &io_mem_notdirty);
2334 assert(n == PHYS_SECTION_NOTDIRTY);
2335 n = dummy_section(&d->map, as, &io_mem_rom);
2336 assert(n == PHYS_SECTION_ROM);
2337 n = dummy_section(&d->map, as, &io_mem_watch);
2338 assert(n == PHYS_SECTION_WATCH);
2340 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2341 d->as = as;
2342 as->next_dispatch = d;
2345 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2347 phys_sections_free(&d->map);
2348 g_free(d);
2351 static void mem_commit(MemoryListener *listener)
2353 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2354 AddressSpaceDispatch *cur = as->dispatch;
2355 AddressSpaceDispatch *next = as->next_dispatch;
2357 phys_page_compact_all(next, next->map.nodes_nb);
2359 atomic_rcu_set(&as->dispatch, next);
2360 if (cur) {
2361 call_rcu(cur, address_space_dispatch_free, rcu);
2365 static void tcg_commit(MemoryListener *listener)
2367 CPUAddressSpace *cpuas;
2368 AddressSpaceDispatch *d;
2370 /* since each CPU stores ram addresses in its TLB cache, we must
2371 reset the modified entries */
2372 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2373 cpu_reloading_memory_map();
2374 /* The CPU and TLB are protected by the iothread lock.
2375 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2376 * may have split the RCU critical section.
2378 d = atomic_rcu_read(&cpuas->as->dispatch);
2379 cpuas->memory_dispatch = d;
2380 tlb_flush(cpuas->cpu, 1);
2383 void address_space_init_dispatch(AddressSpace *as)
2385 as->dispatch = NULL;
2386 as->dispatch_listener = (MemoryListener) {
2387 .begin = mem_begin,
2388 .commit = mem_commit,
2389 .region_add = mem_add,
2390 .region_nop = mem_add,
2391 .priority = 0,
2393 memory_listener_register(&as->dispatch_listener, as);
2396 void address_space_unregister(AddressSpace *as)
2398 memory_listener_unregister(&as->dispatch_listener);
2401 void address_space_destroy_dispatch(AddressSpace *as)
2403 AddressSpaceDispatch *d = as->dispatch;
2405 atomic_rcu_set(&as->dispatch, NULL);
2406 if (d) {
2407 call_rcu(d, address_space_dispatch_free, rcu);
2411 static void memory_map_init(void)
2413 system_memory = g_malloc(sizeof(*system_memory));
2415 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2416 address_space_init(&address_space_memory, system_memory, "memory");
2418 system_io = g_malloc(sizeof(*system_io));
2419 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2420 65536);
2421 address_space_init(&address_space_io, system_io, "I/O");
2424 MemoryRegion *get_system_memory(void)
2426 return system_memory;
2429 MemoryRegion *get_system_io(void)
2431 return system_io;
2434 #endif /* !defined(CONFIG_USER_ONLY) */
2436 /* physical memory access (slow version, mainly for debug) */
2437 #if defined(CONFIG_USER_ONLY)
2438 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2439 uint8_t *buf, int len, int is_write)
2441 int l, flags;
2442 target_ulong page;
2443 void * p;
2445 while (len > 0) {
2446 page = addr & TARGET_PAGE_MASK;
2447 l = (page + TARGET_PAGE_SIZE) - addr;
2448 if (l > len)
2449 l = len;
2450 flags = page_get_flags(page);
2451 if (!(flags & PAGE_VALID))
2452 return -1;
2453 if (is_write) {
2454 if (!(flags & PAGE_WRITE))
2455 return -1;
2456 /* XXX: this code should not depend on lock_user */
2457 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2458 return -1;
2459 memcpy(p, buf, l);
2460 unlock_user(p, addr, l);
2461 } else {
2462 if (!(flags & PAGE_READ))
2463 return -1;
2464 /* XXX: this code should not depend on lock_user */
2465 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2466 return -1;
2467 memcpy(buf, p, l);
2468 unlock_user(p, addr, 0);
2470 len -= l;
2471 buf += l;
2472 addr += l;
2474 return 0;
2477 #else
2479 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2480 hwaddr length)
2482 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2483 addr += memory_region_get_ram_addr(mr);
2485 /* No early return if dirty_log_mask is or becomes 0, because
2486 * cpu_physical_memory_set_dirty_range will still call
2487 * xen_modified_memory.
2489 if (dirty_log_mask) {
2490 dirty_log_mask =
2491 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2493 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2494 tb_lock();
2495 tb_invalidate_phys_range(addr, addr + length);
2496 tb_unlock();
2497 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2499 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2502 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2504 unsigned access_size_max = mr->ops->valid.max_access_size;
2506 /* Regions are assumed to support 1-4 byte accesses unless
2507 otherwise specified. */
2508 if (access_size_max == 0) {
2509 access_size_max = 4;
2512 /* Bound the maximum access by the alignment of the address. */
2513 if (!mr->ops->impl.unaligned) {
2514 unsigned align_size_max = addr & -addr;
2515 if (align_size_max != 0 && align_size_max < access_size_max) {
2516 access_size_max = align_size_max;
2520 /* Don't attempt accesses larger than the maximum. */
2521 if (l > access_size_max) {
2522 l = access_size_max;
2524 l = pow2floor(l);
2526 return l;
2529 static bool prepare_mmio_access(MemoryRegion *mr)
2531 bool unlocked = !qemu_mutex_iothread_locked();
2532 bool release_lock = false;
2534 if (unlocked && mr->global_locking) {
2535 qemu_mutex_lock_iothread();
2536 unlocked = false;
2537 release_lock = true;
2539 if (mr->flush_coalesced_mmio) {
2540 if (unlocked) {
2541 qemu_mutex_lock_iothread();
2543 qemu_flush_coalesced_mmio_buffer();
2544 if (unlocked) {
2545 qemu_mutex_unlock_iothread();
2549 return release_lock;
2552 /* Called within RCU critical section. */
2553 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2554 MemTxAttrs attrs,
2555 const uint8_t *buf,
2556 int len, hwaddr addr1,
2557 hwaddr l, MemoryRegion *mr)
2559 uint8_t *ptr;
2560 uint64_t val;
2561 MemTxResult result = MEMTX_OK;
2562 bool release_lock = false;
2564 for (;;) {
2565 if (!memory_access_is_direct(mr, true)) {
2566 release_lock |= prepare_mmio_access(mr);
2567 l = memory_access_size(mr, l, addr1);
2568 /* XXX: could force current_cpu to NULL to avoid
2569 potential bugs */
2570 switch (l) {
2571 case 8:
2572 /* 64 bit write access */
2573 val = ldq_p(buf);
2574 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2575 attrs);
2576 break;
2577 case 4:
2578 /* 32 bit write access */
2579 val = ldl_p(buf);
2580 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2581 attrs);
2582 break;
2583 case 2:
2584 /* 16 bit write access */
2585 val = lduw_p(buf);
2586 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2587 attrs);
2588 break;
2589 case 1:
2590 /* 8 bit write access */
2591 val = ldub_p(buf);
2592 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2593 attrs);
2594 break;
2595 default:
2596 abort();
2598 } else {
2599 /* RAM case */
2600 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2601 memcpy(ptr, buf, l);
2602 invalidate_and_set_dirty(mr, addr1, l);
2605 if (release_lock) {
2606 qemu_mutex_unlock_iothread();
2607 release_lock = false;
2610 len -= l;
2611 buf += l;
2612 addr += l;
2614 if (!len) {
2615 break;
2618 l = len;
2619 mr = address_space_translate(as, addr, &addr1, &l, true);
2622 return result;
2625 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2626 const uint8_t *buf, int len)
2628 hwaddr l;
2629 hwaddr addr1;
2630 MemoryRegion *mr;
2631 MemTxResult result = MEMTX_OK;
2633 if (len > 0) {
2634 rcu_read_lock();
2635 l = len;
2636 mr = address_space_translate(as, addr, &addr1, &l, true);
2637 result = address_space_write_continue(as, addr, attrs, buf, len,
2638 addr1, l, mr);
2639 rcu_read_unlock();
2642 return result;
2645 /* Called within RCU critical section. */
2646 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2647 MemTxAttrs attrs, uint8_t *buf,
2648 int len, hwaddr addr1, hwaddr l,
2649 MemoryRegion *mr)
2651 uint8_t *ptr;
2652 uint64_t val;
2653 MemTxResult result = MEMTX_OK;
2654 bool release_lock = false;
2656 for (;;) {
2657 if (!memory_access_is_direct(mr, false)) {
2658 /* I/O case */
2659 release_lock |= prepare_mmio_access(mr);
2660 l = memory_access_size(mr, l, addr1);
2661 switch (l) {
2662 case 8:
2663 /* 64 bit read access */
2664 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2665 attrs);
2666 stq_p(buf, val);
2667 break;
2668 case 4:
2669 /* 32 bit read access */
2670 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2671 attrs);
2672 stl_p(buf, val);
2673 break;
2674 case 2:
2675 /* 16 bit read access */
2676 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2677 attrs);
2678 stw_p(buf, val);
2679 break;
2680 case 1:
2681 /* 8 bit read access */
2682 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2683 attrs);
2684 stb_p(buf, val);
2685 break;
2686 default:
2687 abort();
2689 } else {
2690 /* RAM case */
2691 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2692 memcpy(buf, ptr, l);
2695 if (release_lock) {
2696 qemu_mutex_unlock_iothread();
2697 release_lock = false;
2700 len -= l;
2701 buf += l;
2702 addr += l;
2704 if (!len) {
2705 break;
2708 l = len;
2709 mr = address_space_translate(as, addr, &addr1, &l, false);
2712 return result;
2715 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2716 MemTxAttrs attrs, uint8_t *buf, int len)
2718 hwaddr l;
2719 hwaddr addr1;
2720 MemoryRegion *mr;
2721 MemTxResult result = MEMTX_OK;
2723 if (len > 0) {
2724 rcu_read_lock();
2725 l = len;
2726 mr = address_space_translate(as, addr, &addr1, &l, false);
2727 result = address_space_read_continue(as, addr, attrs, buf, len,
2728 addr1, l, mr);
2729 rcu_read_unlock();
2732 return result;
2735 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2736 uint8_t *buf, int len, bool is_write)
2738 if (is_write) {
2739 return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2740 } else {
2741 return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2745 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2746 int len, int is_write)
2748 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2749 buf, len, is_write);
2752 enum write_rom_type {
2753 WRITE_DATA,
2754 FLUSH_CACHE,
2757 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2758 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2760 hwaddr l;
2761 uint8_t *ptr;
2762 hwaddr addr1;
2763 MemoryRegion *mr;
2765 rcu_read_lock();
2766 while (len > 0) {
2767 l = len;
2768 mr = address_space_translate(as, addr, &addr1, &l, true);
2770 if (!(memory_region_is_ram(mr) ||
2771 memory_region_is_romd(mr))) {
2772 l = memory_access_size(mr, l, addr1);
2773 } else {
2774 /* ROM/RAM case */
2775 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2776 switch (type) {
2777 case WRITE_DATA:
2778 memcpy(ptr, buf, l);
2779 invalidate_and_set_dirty(mr, addr1, l);
2780 break;
2781 case FLUSH_CACHE:
2782 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2783 break;
2786 len -= l;
2787 buf += l;
2788 addr += l;
2790 rcu_read_unlock();
2793 /* used for ROM loading : can write in RAM and ROM */
2794 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2795 const uint8_t *buf, int len)
2797 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2800 void cpu_flush_icache_range(hwaddr start, int len)
2803 * This function should do the same thing as an icache flush that was
2804 * triggered from within the guest. For TCG we are always cache coherent,
2805 * so there is no need to flush anything. For KVM / Xen we need to flush
2806 * the host's instruction cache at least.
2808 if (tcg_enabled()) {
2809 return;
2812 cpu_physical_memory_write_rom_internal(&address_space_memory,
2813 start, NULL, len, FLUSH_CACHE);
2816 typedef struct {
2817 MemoryRegion *mr;
2818 void *buffer;
2819 hwaddr addr;
2820 hwaddr len;
2821 bool in_use;
2822 } BounceBuffer;
2824 static BounceBuffer bounce;
2826 typedef struct MapClient {
2827 QEMUBH *bh;
2828 QLIST_ENTRY(MapClient) link;
2829 } MapClient;
2831 QemuMutex map_client_list_lock;
2832 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2833 = QLIST_HEAD_INITIALIZER(map_client_list);
2835 static void cpu_unregister_map_client_do(MapClient *client)
2837 QLIST_REMOVE(client, link);
2838 g_free(client);
2841 static void cpu_notify_map_clients_locked(void)
2843 MapClient *client;
2845 while (!QLIST_EMPTY(&map_client_list)) {
2846 client = QLIST_FIRST(&map_client_list);
2847 qemu_bh_schedule(client->bh);
2848 cpu_unregister_map_client_do(client);
2852 void cpu_register_map_client(QEMUBH *bh)
2854 MapClient *client = g_malloc(sizeof(*client));
2856 qemu_mutex_lock(&map_client_list_lock);
2857 client->bh = bh;
2858 QLIST_INSERT_HEAD(&map_client_list, client, link);
2859 if (!atomic_read(&bounce.in_use)) {
2860 cpu_notify_map_clients_locked();
2862 qemu_mutex_unlock(&map_client_list_lock);
2865 void cpu_exec_init_all(void)
2867 qemu_mutex_init(&ram_list.mutex);
2868 /* The data structures we set up here depend on knowing the page size,
2869 * so no more changes can be made after this point.
2870 * In an ideal world, nothing we did before we had finished the
2871 * machine setup would care about the target page size, and we could
2872 * do this much later, rather than requiring board models to state
2873 * up front what their requirements are.
2875 finalize_target_page_bits();
2876 io_mem_init();
2877 memory_map_init();
2878 qemu_mutex_init(&map_client_list_lock);
2881 void cpu_unregister_map_client(QEMUBH *bh)
2883 MapClient *client;
2885 qemu_mutex_lock(&map_client_list_lock);
2886 QLIST_FOREACH(client, &map_client_list, link) {
2887 if (client->bh == bh) {
2888 cpu_unregister_map_client_do(client);
2889 break;
2892 qemu_mutex_unlock(&map_client_list_lock);
2895 static void cpu_notify_map_clients(void)
2897 qemu_mutex_lock(&map_client_list_lock);
2898 cpu_notify_map_clients_locked();
2899 qemu_mutex_unlock(&map_client_list_lock);
2902 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2904 MemoryRegion *mr;
2905 hwaddr l, xlat;
2907 rcu_read_lock();
2908 while (len > 0) {
2909 l = len;
2910 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2911 if (!memory_access_is_direct(mr, is_write)) {
2912 l = memory_access_size(mr, l, addr);
2913 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2914 return false;
2918 len -= l;
2919 addr += l;
2921 rcu_read_unlock();
2922 return true;
2925 /* Map a physical memory region into a host virtual address.
2926 * May map a subset of the requested range, given by and returned in *plen.
2927 * May return NULL if resources needed to perform the mapping are exhausted.
2928 * Use only for reads OR writes - not for read-modify-write operations.
2929 * Use cpu_register_map_client() to know when retrying the map operation is
2930 * likely to succeed.
2932 void *address_space_map(AddressSpace *as,
2933 hwaddr addr,
2934 hwaddr *plen,
2935 bool is_write)
2937 hwaddr len = *plen;
2938 hwaddr done = 0;
2939 hwaddr l, xlat, base;
2940 MemoryRegion *mr, *this_mr;
2941 void *ptr;
2943 if (len == 0) {
2944 return NULL;
2947 l = len;
2948 rcu_read_lock();
2949 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2951 if (!memory_access_is_direct(mr, is_write)) {
2952 if (atomic_xchg(&bounce.in_use, true)) {
2953 rcu_read_unlock();
2954 return NULL;
2956 /* Avoid unbounded allocations */
2957 l = MIN(l, TARGET_PAGE_SIZE);
2958 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2959 bounce.addr = addr;
2960 bounce.len = l;
2962 memory_region_ref(mr);
2963 bounce.mr = mr;
2964 if (!is_write) {
2965 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2966 bounce.buffer, l);
2969 rcu_read_unlock();
2970 *plen = l;
2971 return bounce.buffer;
2974 base = xlat;
2976 for (;;) {
2977 len -= l;
2978 addr += l;
2979 done += l;
2980 if (len == 0) {
2981 break;
2984 l = len;
2985 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2986 if (this_mr != mr || xlat != base + done) {
2987 break;
2991 memory_region_ref(mr);
2992 *plen = done;
2993 ptr = qemu_ram_ptr_length(mr->ram_block, base, plen);
2994 rcu_read_unlock();
2996 return ptr;
2999 /* Unmaps a memory region previously mapped by address_space_map().
3000 * Will also mark the memory as dirty if is_write == 1. access_len gives
3001 * the amount of memory that was actually read or written by the caller.
3003 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3004 int is_write, hwaddr access_len)
3006 if (buffer != bounce.buffer) {
3007 MemoryRegion *mr;
3008 ram_addr_t addr1;
3010 mr = memory_region_from_host(buffer, &addr1);
3011 assert(mr != NULL);
3012 if (is_write) {
3013 invalidate_and_set_dirty(mr, addr1, access_len);
3015 if (xen_enabled()) {
3016 xen_invalidate_map_cache_entry(buffer);
3018 memory_region_unref(mr);
3019 return;
3021 if (is_write) {
3022 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
3023 bounce.buffer, access_len);
3025 qemu_vfree(bounce.buffer);
3026 bounce.buffer = NULL;
3027 memory_region_unref(bounce.mr);
3028 atomic_mb_set(&bounce.in_use, false);
3029 cpu_notify_map_clients();
3032 void *cpu_physical_memory_map(hwaddr addr,
3033 hwaddr *plen,
3034 int is_write)
3036 return address_space_map(&address_space_memory, addr, plen, is_write);
3039 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3040 int is_write, hwaddr access_len)
3042 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3045 /* warning: addr must be aligned */
3046 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3047 MemTxAttrs attrs,
3048 MemTxResult *result,
3049 enum device_endian endian)
3051 uint8_t *ptr;
3052 uint64_t val;
3053 MemoryRegion *mr;
3054 hwaddr l = 4;
3055 hwaddr addr1;
3056 MemTxResult r;
3057 bool release_lock = false;
3059 rcu_read_lock();
3060 mr = address_space_translate(as, addr, &addr1, &l, false);
3061 if (l < 4 || !memory_access_is_direct(mr, false)) {
3062 release_lock |= prepare_mmio_access(mr);
3064 /* I/O case */
3065 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3066 #if defined(TARGET_WORDS_BIGENDIAN)
3067 if (endian == DEVICE_LITTLE_ENDIAN) {
3068 val = bswap32(val);
3070 #else
3071 if (endian == DEVICE_BIG_ENDIAN) {
3072 val = bswap32(val);
3074 #endif
3075 } else {
3076 /* RAM case */
3077 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3078 switch (endian) {
3079 case DEVICE_LITTLE_ENDIAN:
3080 val = ldl_le_p(ptr);
3081 break;
3082 case DEVICE_BIG_ENDIAN:
3083 val = ldl_be_p(ptr);
3084 break;
3085 default:
3086 val = ldl_p(ptr);
3087 break;
3089 r = MEMTX_OK;
3091 if (result) {
3092 *result = r;
3094 if (release_lock) {
3095 qemu_mutex_unlock_iothread();
3097 rcu_read_unlock();
3098 return val;
3101 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3102 MemTxAttrs attrs, MemTxResult *result)
3104 return address_space_ldl_internal(as, addr, attrs, result,
3105 DEVICE_NATIVE_ENDIAN);
3108 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3109 MemTxAttrs attrs, MemTxResult *result)
3111 return address_space_ldl_internal(as, addr, attrs, result,
3112 DEVICE_LITTLE_ENDIAN);
3115 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3116 MemTxAttrs attrs, MemTxResult *result)
3118 return address_space_ldl_internal(as, addr, attrs, result,
3119 DEVICE_BIG_ENDIAN);
3122 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3124 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3127 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3129 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3132 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3134 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3137 /* warning: addr must be aligned */
3138 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3139 MemTxAttrs attrs,
3140 MemTxResult *result,
3141 enum device_endian endian)
3143 uint8_t *ptr;
3144 uint64_t val;
3145 MemoryRegion *mr;
3146 hwaddr l = 8;
3147 hwaddr addr1;
3148 MemTxResult r;
3149 bool release_lock = false;
3151 rcu_read_lock();
3152 mr = address_space_translate(as, addr, &addr1, &l,
3153 false);
3154 if (l < 8 || !memory_access_is_direct(mr, false)) {
3155 release_lock |= prepare_mmio_access(mr);
3157 /* I/O case */
3158 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3159 #if defined(TARGET_WORDS_BIGENDIAN)
3160 if (endian == DEVICE_LITTLE_ENDIAN) {
3161 val = bswap64(val);
3163 #else
3164 if (endian == DEVICE_BIG_ENDIAN) {
3165 val = bswap64(val);
3167 #endif
3168 } else {
3169 /* RAM case */
3170 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3171 switch (endian) {
3172 case DEVICE_LITTLE_ENDIAN:
3173 val = ldq_le_p(ptr);
3174 break;
3175 case DEVICE_BIG_ENDIAN:
3176 val = ldq_be_p(ptr);
3177 break;
3178 default:
3179 val = ldq_p(ptr);
3180 break;
3182 r = MEMTX_OK;
3184 if (result) {
3185 *result = r;
3187 if (release_lock) {
3188 qemu_mutex_unlock_iothread();
3190 rcu_read_unlock();
3191 return val;
3194 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3195 MemTxAttrs attrs, MemTxResult *result)
3197 return address_space_ldq_internal(as, addr, attrs, result,
3198 DEVICE_NATIVE_ENDIAN);
3201 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3202 MemTxAttrs attrs, MemTxResult *result)
3204 return address_space_ldq_internal(as, addr, attrs, result,
3205 DEVICE_LITTLE_ENDIAN);
3208 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3209 MemTxAttrs attrs, MemTxResult *result)
3211 return address_space_ldq_internal(as, addr, attrs, result,
3212 DEVICE_BIG_ENDIAN);
3215 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3217 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3220 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3222 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3225 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3227 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3230 /* XXX: optimize */
3231 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3232 MemTxAttrs attrs, MemTxResult *result)
3234 uint8_t val;
3235 MemTxResult r;
3237 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3238 if (result) {
3239 *result = r;
3241 return val;
3244 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3246 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3249 /* warning: addr must be aligned */
3250 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3251 hwaddr addr,
3252 MemTxAttrs attrs,
3253 MemTxResult *result,
3254 enum device_endian endian)
3256 uint8_t *ptr;
3257 uint64_t val;
3258 MemoryRegion *mr;
3259 hwaddr l = 2;
3260 hwaddr addr1;
3261 MemTxResult r;
3262 bool release_lock = false;
3264 rcu_read_lock();
3265 mr = address_space_translate(as, addr, &addr1, &l,
3266 false);
3267 if (l < 2 || !memory_access_is_direct(mr, false)) {
3268 release_lock |= prepare_mmio_access(mr);
3270 /* I/O case */
3271 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3272 #if defined(TARGET_WORDS_BIGENDIAN)
3273 if (endian == DEVICE_LITTLE_ENDIAN) {
3274 val = bswap16(val);
3276 #else
3277 if (endian == DEVICE_BIG_ENDIAN) {
3278 val = bswap16(val);
3280 #endif
3281 } else {
3282 /* RAM case */
3283 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3284 switch (endian) {
3285 case DEVICE_LITTLE_ENDIAN:
3286 val = lduw_le_p(ptr);
3287 break;
3288 case DEVICE_BIG_ENDIAN:
3289 val = lduw_be_p(ptr);
3290 break;
3291 default:
3292 val = lduw_p(ptr);
3293 break;
3295 r = MEMTX_OK;
3297 if (result) {
3298 *result = r;
3300 if (release_lock) {
3301 qemu_mutex_unlock_iothread();
3303 rcu_read_unlock();
3304 return val;
3307 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3308 MemTxAttrs attrs, MemTxResult *result)
3310 return address_space_lduw_internal(as, addr, attrs, result,
3311 DEVICE_NATIVE_ENDIAN);
3314 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3315 MemTxAttrs attrs, MemTxResult *result)
3317 return address_space_lduw_internal(as, addr, attrs, result,
3318 DEVICE_LITTLE_ENDIAN);
3321 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3322 MemTxAttrs attrs, MemTxResult *result)
3324 return address_space_lduw_internal(as, addr, attrs, result,
3325 DEVICE_BIG_ENDIAN);
3328 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3330 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3333 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3335 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3338 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3340 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3343 /* warning: addr must be aligned. The ram page is not masked as dirty
3344 and the code inside is not invalidated. It is useful if the dirty
3345 bits are used to track modified PTEs */
3346 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3347 MemTxAttrs attrs, MemTxResult *result)
3349 uint8_t *ptr;
3350 MemoryRegion *mr;
3351 hwaddr l = 4;
3352 hwaddr addr1;
3353 MemTxResult r;
3354 uint8_t dirty_log_mask;
3355 bool release_lock = false;
3357 rcu_read_lock();
3358 mr = address_space_translate(as, addr, &addr1, &l,
3359 true);
3360 if (l < 4 || !memory_access_is_direct(mr, true)) {
3361 release_lock |= prepare_mmio_access(mr);
3363 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3364 } else {
3365 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3366 stl_p(ptr, val);
3368 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3369 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3370 cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr,
3371 4, dirty_log_mask);
3372 r = MEMTX_OK;
3374 if (result) {
3375 *result = r;
3377 if (release_lock) {
3378 qemu_mutex_unlock_iothread();
3380 rcu_read_unlock();
3383 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3385 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3388 /* warning: addr must be aligned */
3389 static inline void address_space_stl_internal(AddressSpace *as,
3390 hwaddr addr, uint32_t val,
3391 MemTxAttrs attrs,
3392 MemTxResult *result,
3393 enum device_endian endian)
3395 uint8_t *ptr;
3396 MemoryRegion *mr;
3397 hwaddr l = 4;
3398 hwaddr addr1;
3399 MemTxResult r;
3400 bool release_lock = false;
3402 rcu_read_lock();
3403 mr = address_space_translate(as, addr, &addr1, &l,
3404 true);
3405 if (l < 4 || !memory_access_is_direct(mr, true)) {
3406 release_lock |= prepare_mmio_access(mr);
3408 #if defined(TARGET_WORDS_BIGENDIAN)
3409 if (endian == DEVICE_LITTLE_ENDIAN) {
3410 val = bswap32(val);
3412 #else
3413 if (endian == DEVICE_BIG_ENDIAN) {
3414 val = bswap32(val);
3416 #endif
3417 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3418 } else {
3419 /* RAM case */
3420 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3421 switch (endian) {
3422 case DEVICE_LITTLE_ENDIAN:
3423 stl_le_p(ptr, val);
3424 break;
3425 case DEVICE_BIG_ENDIAN:
3426 stl_be_p(ptr, val);
3427 break;
3428 default:
3429 stl_p(ptr, val);
3430 break;
3432 invalidate_and_set_dirty(mr, addr1, 4);
3433 r = MEMTX_OK;
3435 if (result) {
3436 *result = r;
3438 if (release_lock) {
3439 qemu_mutex_unlock_iothread();
3441 rcu_read_unlock();
3444 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3445 MemTxAttrs attrs, MemTxResult *result)
3447 address_space_stl_internal(as, addr, val, attrs, result,
3448 DEVICE_NATIVE_ENDIAN);
3451 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3452 MemTxAttrs attrs, MemTxResult *result)
3454 address_space_stl_internal(as, addr, val, attrs, result,
3455 DEVICE_LITTLE_ENDIAN);
3458 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3459 MemTxAttrs attrs, MemTxResult *result)
3461 address_space_stl_internal(as, addr, val, attrs, result,
3462 DEVICE_BIG_ENDIAN);
3465 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3467 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3470 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3472 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3475 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3477 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3480 /* XXX: optimize */
3481 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3482 MemTxAttrs attrs, MemTxResult *result)
3484 uint8_t v = val;
3485 MemTxResult r;
3487 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3488 if (result) {
3489 *result = r;
3493 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3495 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3498 /* warning: addr must be aligned */
3499 static inline void address_space_stw_internal(AddressSpace *as,
3500 hwaddr addr, uint32_t val,
3501 MemTxAttrs attrs,
3502 MemTxResult *result,
3503 enum device_endian endian)
3505 uint8_t *ptr;
3506 MemoryRegion *mr;
3507 hwaddr l = 2;
3508 hwaddr addr1;
3509 MemTxResult r;
3510 bool release_lock = false;
3512 rcu_read_lock();
3513 mr = address_space_translate(as, addr, &addr1, &l, true);
3514 if (l < 2 || !memory_access_is_direct(mr, true)) {
3515 release_lock |= prepare_mmio_access(mr);
3517 #if defined(TARGET_WORDS_BIGENDIAN)
3518 if (endian == DEVICE_LITTLE_ENDIAN) {
3519 val = bswap16(val);
3521 #else
3522 if (endian == DEVICE_BIG_ENDIAN) {
3523 val = bswap16(val);
3525 #endif
3526 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3527 } else {
3528 /* RAM case */
3529 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3530 switch (endian) {
3531 case DEVICE_LITTLE_ENDIAN:
3532 stw_le_p(ptr, val);
3533 break;
3534 case DEVICE_BIG_ENDIAN:
3535 stw_be_p(ptr, val);
3536 break;
3537 default:
3538 stw_p(ptr, val);
3539 break;
3541 invalidate_and_set_dirty(mr, addr1, 2);
3542 r = MEMTX_OK;
3544 if (result) {
3545 *result = r;
3547 if (release_lock) {
3548 qemu_mutex_unlock_iothread();
3550 rcu_read_unlock();
3553 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3554 MemTxAttrs attrs, MemTxResult *result)
3556 address_space_stw_internal(as, addr, val, attrs, result,
3557 DEVICE_NATIVE_ENDIAN);
3560 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3561 MemTxAttrs attrs, MemTxResult *result)
3563 address_space_stw_internal(as, addr, val, attrs, result,
3564 DEVICE_LITTLE_ENDIAN);
3567 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3568 MemTxAttrs attrs, MemTxResult *result)
3570 address_space_stw_internal(as, addr, val, attrs, result,
3571 DEVICE_BIG_ENDIAN);
3574 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3576 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3579 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3581 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3584 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3586 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3589 /* XXX: optimize */
3590 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3591 MemTxAttrs attrs, MemTxResult *result)
3593 MemTxResult r;
3594 val = tswap64(val);
3595 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3596 if (result) {
3597 *result = r;
3601 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3602 MemTxAttrs attrs, MemTxResult *result)
3604 MemTxResult r;
3605 val = cpu_to_le64(val);
3606 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3607 if (result) {
3608 *result = r;
3611 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3612 MemTxAttrs attrs, MemTxResult *result)
3614 MemTxResult r;
3615 val = cpu_to_be64(val);
3616 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3617 if (result) {
3618 *result = r;
3622 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3624 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3627 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3629 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3632 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3634 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3637 /* virtual memory access for debug (includes writing to ROM) */
3638 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3639 uint8_t *buf, int len, int is_write)
3641 int l;
3642 hwaddr phys_addr;
3643 target_ulong page;
3645 while (len > 0) {
3646 int asidx;
3647 MemTxAttrs attrs;
3649 page = addr & TARGET_PAGE_MASK;
3650 phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3651 asidx = cpu_asidx_from_attrs(cpu, attrs);
3652 /* if no physical page mapped, return an error */
3653 if (phys_addr == -1)
3654 return -1;
3655 l = (page + TARGET_PAGE_SIZE) - addr;
3656 if (l > len)
3657 l = len;
3658 phys_addr += (addr & ~TARGET_PAGE_MASK);
3659 if (is_write) {
3660 cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3661 phys_addr, buf, l);
3662 } else {
3663 address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3664 MEMTXATTRS_UNSPECIFIED,
3665 buf, l, 0);
3667 len -= l;
3668 buf += l;
3669 addr += l;
3671 return 0;
3675 * Allows code that needs to deal with migration bitmaps etc to still be built
3676 * target independent.
3678 size_t qemu_target_page_bits(void)
3680 return TARGET_PAGE_BITS;
3683 #endif
3686 * A helper function for the _utterly broken_ virtio device model to find out if
3687 * it's running on a big endian machine. Don't do this at home kids!
3689 bool target_words_bigendian(void);
3690 bool target_words_bigendian(void)
3692 #if defined(TARGET_WORDS_BIGENDIAN)
3693 return true;
3694 #else
3695 return false;
3696 #endif
3699 #ifndef CONFIG_USER_ONLY
3700 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3702 MemoryRegion*mr;
3703 hwaddr l = 1;
3704 bool res;
3706 rcu_read_lock();
3707 mr = address_space_translate(&address_space_memory,
3708 phys_addr, &phys_addr, &l, false);
3710 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3711 rcu_read_unlock();
3712 return res;
3715 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3717 RAMBlock *block;
3718 int ret = 0;
3720 rcu_read_lock();
3721 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3722 ret = func(block->idstr, block->host, block->offset,
3723 block->used_length, opaque);
3724 if (ret) {
3725 break;
3728 rcu_read_unlock();
3729 return ret;
3731 #endif