nbd: Improve server handling of shutdown requests
[qemu.git] / exec.c
blobf3c2770d54e7e43f31f4b70773c88444b96a4b45
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include "qapi/error.h"
21 #ifndef _WIN32
22 #endif
24 #include "qemu/cutils.h"
25 #include "cpu.h"
26 #include "exec/exec-all.h"
27 #include "tcg.h"
28 #include "hw/qdev-core.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
31 #include "hw/xen/xen.h"
32 #endif
33 #include "sysemu/kvm.h"
34 #include "sysemu/sysemu.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "qemu/error-report.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include "qemu.h"
40 #else /* !CONFIG_USER_ONLY */
41 #include "hw/hw.h"
42 #include "exec/memory.h"
43 #include "exec/ioport.h"
44 #include "sysemu/dma.h"
45 #include "exec/address-spaces.h"
46 #include "sysemu/xen-mapcache.h"
47 #include "trace.h"
48 #endif
49 #include "exec/cpu-all.h"
50 #include "qemu/rcu_queue.h"
51 #include "qemu/main-loop.h"
52 #include "translate-all.h"
53 #include "sysemu/replay.h"
55 #include "exec/memory-internal.h"
56 #include "exec/ram_addr.h"
57 #include "exec/log.h"
59 #include "migration/vmstate.h"
61 #include "qemu/range.h"
62 #ifndef _WIN32
63 #include "qemu/mmap-alloc.h"
64 #endif
66 //#define DEBUG_SUBPAGE
68 #if !defined(CONFIG_USER_ONLY)
69 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
70 * are protected by the ramlist lock.
72 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
74 static MemoryRegion *system_memory;
75 static MemoryRegion *system_io;
77 AddressSpace address_space_io;
78 AddressSpace address_space_memory;
80 MemoryRegion io_mem_rom, io_mem_notdirty;
81 static MemoryRegion io_mem_unassigned;
83 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
84 #define RAM_PREALLOC (1 << 0)
86 /* RAM is mmap-ed with MAP_SHARED */
87 #define RAM_SHARED (1 << 1)
89 /* Only a portion of RAM (used_length) is actually used, and migrated.
90 * This used_length size can change across reboots.
92 #define RAM_RESIZEABLE (1 << 2)
94 #endif
96 #ifdef TARGET_PAGE_BITS_VARY
97 int target_page_bits;
98 bool target_page_bits_decided;
99 #endif
101 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
102 /* current CPU in the current thread. It is only valid inside
103 cpu_exec() */
104 __thread CPUState *current_cpu;
105 /* 0 = Do not count executed instructions.
106 1 = Precise instruction counting.
107 2 = Adaptive rate instruction counting. */
108 int use_icount;
110 bool set_preferred_target_page_bits(int bits)
112 /* The target page size is the lowest common denominator for all
113 * the CPUs in the system, so we can only make it smaller, never
114 * larger. And we can't make it smaller once we've committed to
115 * a particular size.
117 #ifdef TARGET_PAGE_BITS_VARY
118 assert(bits >= TARGET_PAGE_BITS_MIN);
119 if (target_page_bits == 0 || target_page_bits > bits) {
120 if (target_page_bits_decided) {
121 return false;
123 target_page_bits = bits;
125 #endif
126 return true;
129 #if !defined(CONFIG_USER_ONLY)
131 static void finalize_target_page_bits(void)
133 #ifdef TARGET_PAGE_BITS_VARY
134 if (target_page_bits == 0) {
135 target_page_bits = TARGET_PAGE_BITS_MIN;
137 target_page_bits_decided = true;
138 #endif
141 typedef struct PhysPageEntry PhysPageEntry;
143 struct PhysPageEntry {
144 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
145 uint32_t skip : 6;
146 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
147 uint32_t ptr : 26;
150 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
152 /* Size of the L2 (and L3, etc) page tables. */
153 #define ADDR_SPACE_BITS 64
155 #define P_L2_BITS 9
156 #define P_L2_SIZE (1 << P_L2_BITS)
158 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
160 typedef PhysPageEntry Node[P_L2_SIZE];
162 typedef struct PhysPageMap {
163 struct rcu_head rcu;
165 unsigned sections_nb;
166 unsigned sections_nb_alloc;
167 unsigned nodes_nb;
168 unsigned nodes_nb_alloc;
169 Node *nodes;
170 MemoryRegionSection *sections;
171 } PhysPageMap;
173 struct AddressSpaceDispatch {
174 struct rcu_head rcu;
176 MemoryRegionSection *mru_section;
177 /* This is a multi-level map on the physical address space.
178 * The bottom level has pointers to MemoryRegionSections.
180 PhysPageEntry phys_map;
181 PhysPageMap map;
182 AddressSpace *as;
185 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
186 typedef struct subpage_t {
187 MemoryRegion iomem;
188 AddressSpace *as;
189 hwaddr base;
190 uint16_t sub_section[];
191 } subpage_t;
193 #define PHYS_SECTION_UNASSIGNED 0
194 #define PHYS_SECTION_NOTDIRTY 1
195 #define PHYS_SECTION_ROM 2
196 #define PHYS_SECTION_WATCH 3
198 static void io_mem_init(void);
199 static void memory_map_init(void);
200 static void tcg_commit(MemoryListener *listener);
202 static MemoryRegion io_mem_watch;
205 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
206 * @cpu: the CPU whose AddressSpace this is
207 * @as: the AddressSpace itself
208 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
209 * @tcg_as_listener: listener for tracking changes to the AddressSpace
211 struct CPUAddressSpace {
212 CPUState *cpu;
213 AddressSpace *as;
214 struct AddressSpaceDispatch *memory_dispatch;
215 MemoryListener tcg_as_listener;
218 #endif
220 #if !defined(CONFIG_USER_ONLY)
222 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
224 static unsigned alloc_hint = 16;
225 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
226 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, alloc_hint);
227 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
228 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
229 alloc_hint = map->nodes_nb_alloc;
233 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
235 unsigned i;
236 uint32_t ret;
237 PhysPageEntry e;
238 PhysPageEntry *p;
240 ret = map->nodes_nb++;
241 p = map->nodes[ret];
242 assert(ret != PHYS_MAP_NODE_NIL);
243 assert(ret != map->nodes_nb_alloc);
245 e.skip = leaf ? 0 : 1;
246 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
247 for (i = 0; i < P_L2_SIZE; ++i) {
248 memcpy(&p[i], &e, sizeof(e));
250 return ret;
253 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
254 hwaddr *index, hwaddr *nb, uint16_t leaf,
255 int level)
257 PhysPageEntry *p;
258 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
260 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
261 lp->ptr = phys_map_node_alloc(map, level == 0);
263 p = map->nodes[lp->ptr];
264 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
266 while (*nb && lp < &p[P_L2_SIZE]) {
267 if ((*index & (step - 1)) == 0 && *nb >= step) {
268 lp->skip = 0;
269 lp->ptr = leaf;
270 *index += step;
271 *nb -= step;
272 } else {
273 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
275 ++lp;
279 static void phys_page_set(AddressSpaceDispatch *d,
280 hwaddr index, hwaddr nb,
281 uint16_t leaf)
283 /* Wildly overreserve - it doesn't matter much. */
284 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
286 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
289 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
290 * and update our entry so we can skip it and go directly to the destination.
292 static void phys_page_compact(PhysPageEntry *lp, Node *nodes)
294 unsigned valid_ptr = P_L2_SIZE;
295 int valid = 0;
296 PhysPageEntry *p;
297 int i;
299 if (lp->ptr == PHYS_MAP_NODE_NIL) {
300 return;
303 p = nodes[lp->ptr];
304 for (i = 0; i < P_L2_SIZE; i++) {
305 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
306 continue;
309 valid_ptr = i;
310 valid++;
311 if (p[i].skip) {
312 phys_page_compact(&p[i], nodes);
316 /* We can only compress if there's only one child. */
317 if (valid != 1) {
318 return;
321 assert(valid_ptr < P_L2_SIZE);
323 /* Don't compress if it won't fit in the # of bits we have. */
324 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
325 return;
328 lp->ptr = p[valid_ptr].ptr;
329 if (!p[valid_ptr].skip) {
330 /* If our only child is a leaf, make this a leaf. */
331 /* By design, we should have made this node a leaf to begin with so we
332 * should never reach here.
333 * But since it's so simple to handle this, let's do it just in case we
334 * change this rule.
336 lp->skip = 0;
337 } else {
338 lp->skip += p[valid_ptr].skip;
342 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
344 if (d->phys_map.skip) {
345 phys_page_compact(&d->phys_map, d->map.nodes);
349 static inline bool section_covers_addr(const MemoryRegionSection *section,
350 hwaddr addr)
352 /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
353 * the section must cover the entire address space.
355 return int128_gethi(section->size) ||
356 range_covers_byte(section->offset_within_address_space,
357 int128_getlo(section->size), addr);
360 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
361 Node *nodes, MemoryRegionSection *sections)
363 PhysPageEntry *p;
364 hwaddr index = addr >> TARGET_PAGE_BITS;
365 int i;
367 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
368 if (lp.ptr == PHYS_MAP_NODE_NIL) {
369 return &sections[PHYS_SECTION_UNASSIGNED];
371 p = nodes[lp.ptr];
372 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
375 if (section_covers_addr(&sections[lp.ptr], addr)) {
376 return &sections[lp.ptr];
377 } else {
378 return &sections[PHYS_SECTION_UNASSIGNED];
382 bool memory_region_is_unassigned(MemoryRegion *mr)
384 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
385 && mr != &io_mem_watch;
388 /* Called from RCU critical section */
389 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
390 hwaddr addr,
391 bool resolve_subpage)
393 MemoryRegionSection *section = atomic_read(&d->mru_section);
394 subpage_t *subpage;
395 bool update;
397 if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
398 section_covers_addr(section, addr)) {
399 update = false;
400 } else {
401 section = phys_page_find(d->phys_map, addr, d->map.nodes,
402 d->map.sections);
403 update = true;
405 if (resolve_subpage && section->mr->subpage) {
406 subpage = container_of(section->mr, subpage_t, iomem);
407 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
409 if (update) {
410 atomic_set(&d->mru_section, section);
412 return section;
415 /* Called from RCU critical section */
416 static MemoryRegionSection *
417 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
418 hwaddr *plen, bool resolve_subpage)
420 MemoryRegionSection *section;
421 MemoryRegion *mr;
422 Int128 diff;
424 section = address_space_lookup_region(d, addr, resolve_subpage);
425 /* Compute offset within MemoryRegionSection */
426 addr -= section->offset_within_address_space;
428 /* Compute offset within MemoryRegion */
429 *xlat = addr + section->offset_within_region;
431 mr = section->mr;
433 /* MMIO registers can be expected to perform full-width accesses based only
434 * on their address, without considering adjacent registers that could
435 * decode to completely different MemoryRegions. When such registers
436 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
437 * regions overlap wildly. For this reason we cannot clamp the accesses
438 * here.
440 * If the length is small (as is the case for address_space_ldl/stl),
441 * everything works fine. If the incoming length is large, however,
442 * the caller really has to do the clamping through memory_access_size.
444 if (memory_region_is_ram(mr)) {
445 diff = int128_sub(section->size, int128_make64(addr));
446 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
448 return section;
451 /* Called from RCU critical section */
452 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
453 hwaddr *xlat, hwaddr *plen,
454 bool is_write)
456 IOMMUTLBEntry iotlb;
457 MemoryRegionSection *section;
458 MemoryRegion *mr;
460 for (;;) {
461 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
462 section = address_space_translate_internal(d, addr, &addr, plen, true);
463 mr = section->mr;
465 if (!mr->iommu_ops) {
466 break;
469 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
470 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
471 | (addr & iotlb.addr_mask));
472 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
473 if (!(iotlb.perm & (1 << is_write))) {
474 mr = &io_mem_unassigned;
475 break;
478 as = iotlb.target_as;
481 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
482 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
483 *plen = MIN(page, *plen);
486 *xlat = addr;
487 return mr;
490 /* Called from RCU critical section */
491 MemoryRegionSection *
492 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
493 hwaddr *xlat, hwaddr *plen)
495 MemoryRegionSection *section;
496 AddressSpaceDispatch *d = atomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch);
498 section = address_space_translate_internal(d, addr, xlat, plen, false);
500 assert(!section->mr->iommu_ops);
501 return section;
503 #endif
505 #if !defined(CONFIG_USER_ONLY)
507 static int cpu_common_post_load(void *opaque, int version_id)
509 CPUState *cpu = opaque;
511 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
512 version_id is increased. */
513 cpu->interrupt_request &= ~0x01;
514 tlb_flush(cpu, 1);
516 return 0;
519 static int cpu_common_pre_load(void *opaque)
521 CPUState *cpu = opaque;
523 cpu->exception_index = -1;
525 return 0;
528 static bool cpu_common_exception_index_needed(void *opaque)
530 CPUState *cpu = opaque;
532 return tcg_enabled() && cpu->exception_index != -1;
535 static const VMStateDescription vmstate_cpu_common_exception_index = {
536 .name = "cpu_common/exception_index",
537 .version_id = 1,
538 .minimum_version_id = 1,
539 .needed = cpu_common_exception_index_needed,
540 .fields = (VMStateField[]) {
541 VMSTATE_INT32(exception_index, CPUState),
542 VMSTATE_END_OF_LIST()
546 static bool cpu_common_crash_occurred_needed(void *opaque)
548 CPUState *cpu = opaque;
550 return cpu->crash_occurred;
553 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
554 .name = "cpu_common/crash_occurred",
555 .version_id = 1,
556 .minimum_version_id = 1,
557 .needed = cpu_common_crash_occurred_needed,
558 .fields = (VMStateField[]) {
559 VMSTATE_BOOL(crash_occurred, CPUState),
560 VMSTATE_END_OF_LIST()
564 const VMStateDescription vmstate_cpu_common = {
565 .name = "cpu_common",
566 .version_id = 1,
567 .minimum_version_id = 1,
568 .pre_load = cpu_common_pre_load,
569 .post_load = cpu_common_post_load,
570 .fields = (VMStateField[]) {
571 VMSTATE_UINT32(halted, CPUState),
572 VMSTATE_UINT32(interrupt_request, CPUState),
573 VMSTATE_END_OF_LIST()
575 .subsections = (const VMStateDescription*[]) {
576 &vmstate_cpu_common_exception_index,
577 &vmstate_cpu_common_crash_occurred,
578 NULL
582 #endif
584 CPUState *qemu_get_cpu(int index)
586 CPUState *cpu;
588 CPU_FOREACH(cpu) {
589 if (cpu->cpu_index == index) {
590 return cpu;
594 return NULL;
597 #if !defined(CONFIG_USER_ONLY)
598 void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
600 CPUAddressSpace *newas;
602 /* Target code should have set num_ases before calling us */
603 assert(asidx < cpu->num_ases);
605 if (asidx == 0) {
606 /* address space 0 gets the convenience alias */
607 cpu->as = as;
610 /* KVM cannot currently support multiple address spaces. */
611 assert(asidx == 0 || !kvm_enabled());
613 if (!cpu->cpu_ases) {
614 cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
617 newas = &cpu->cpu_ases[asidx];
618 newas->cpu = cpu;
619 newas->as = as;
620 if (tcg_enabled()) {
621 newas->tcg_as_listener.commit = tcg_commit;
622 memory_listener_register(&newas->tcg_as_listener, as);
626 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
628 /* Return the AddressSpace corresponding to the specified index */
629 return cpu->cpu_ases[asidx].as;
631 #endif
633 void cpu_exec_unrealizefn(CPUState *cpu)
635 CPUClass *cc = CPU_GET_CLASS(cpu);
637 cpu_list_remove(cpu);
639 if (cc->vmsd != NULL) {
640 vmstate_unregister(NULL, cc->vmsd, cpu);
642 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
643 vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
647 void cpu_exec_initfn(CPUState *cpu)
649 cpu->as = NULL;
650 cpu->num_ases = 0;
652 #ifndef CONFIG_USER_ONLY
653 cpu->thread_id = qemu_get_thread_id();
655 /* This is a softmmu CPU object, so create a property for it
656 * so users can wire up its memory. (This can't go in qom/cpu.c
657 * because that file is compiled only once for both user-mode
658 * and system builds.) The default if no link is set up is to use
659 * the system address space.
661 object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
662 (Object **)&cpu->memory,
663 qdev_prop_allow_set_link_before_realize,
664 OBJ_PROP_LINK_UNREF_ON_RELEASE,
665 &error_abort);
666 cpu->memory = system_memory;
667 object_ref(OBJECT(cpu->memory));
668 #endif
671 void cpu_exec_realizefn(CPUState *cpu, Error **errp)
673 CPUClass *cc ATTRIBUTE_UNUSED = CPU_GET_CLASS(cpu);
675 cpu_list_add(cpu);
677 #ifndef CONFIG_USER_ONLY
678 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
679 vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
681 if (cc->vmsd != NULL) {
682 vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
684 #endif
687 #if defined(CONFIG_USER_ONLY)
688 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
690 mmap_lock();
691 tb_lock();
692 tb_invalidate_phys_page_range(pc, pc + 1, 0);
693 tb_unlock();
694 mmap_unlock();
696 #else
697 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
699 MemTxAttrs attrs;
700 hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
701 int asidx = cpu_asidx_from_attrs(cpu, attrs);
702 if (phys != -1) {
703 /* Locks grabbed by tb_invalidate_phys_addr */
704 tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
705 phys | (pc & ~TARGET_PAGE_MASK));
708 #endif
710 #if defined(CONFIG_USER_ONLY)
711 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
716 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
717 int flags)
719 return -ENOSYS;
722 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
726 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
727 int flags, CPUWatchpoint **watchpoint)
729 return -ENOSYS;
731 #else
732 /* Add a watchpoint. */
733 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
734 int flags, CPUWatchpoint **watchpoint)
736 CPUWatchpoint *wp;
738 /* forbid ranges which are empty or run off the end of the address space */
739 if (len == 0 || (addr + len - 1) < addr) {
740 error_report("tried to set invalid watchpoint at %"
741 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
742 return -EINVAL;
744 wp = g_malloc(sizeof(*wp));
746 wp->vaddr = addr;
747 wp->len = len;
748 wp->flags = flags;
750 /* keep all GDB-injected watchpoints in front */
751 if (flags & BP_GDB) {
752 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
753 } else {
754 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
757 tlb_flush_page(cpu, addr);
759 if (watchpoint)
760 *watchpoint = wp;
761 return 0;
764 /* Remove a specific watchpoint. */
765 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
766 int flags)
768 CPUWatchpoint *wp;
770 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
771 if (addr == wp->vaddr && len == wp->len
772 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
773 cpu_watchpoint_remove_by_ref(cpu, wp);
774 return 0;
777 return -ENOENT;
780 /* Remove a specific watchpoint by reference. */
781 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
783 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
785 tlb_flush_page(cpu, watchpoint->vaddr);
787 g_free(watchpoint);
790 /* Remove all matching watchpoints. */
791 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
793 CPUWatchpoint *wp, *next;
795 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
796 if (wp->flags & mask) {
797 cpu_watchpoint_remove_by_ref(cpu, wp);
802 /* Return true if this watchpoint address matches the specified
803 * access (ie the address range covered by the watchpoint overlaps
804 * partially or completely with the address range covered by the
805 * access).
807 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
808 vaddr addr,
809 vaddr len)
811 /* We know the lengths are non-zero, but a little caution is
812 * required to avoid errors in the case where the range ends
813 * exactly at the top of the address space and so addr + len
814 * wraps round to zero.
816 vaddr wpend = wp->vaddr + wp->len - 1;
817 vaddr addrend = addr + len - 1;
819 return !(addr > wpend || wp->vaddr > addrend);
822 #endif
824 /* Add a breakpoint. */
825 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
826 CPUBreakpoint **breakpoint)
828 CPUBreakpoint *bp;
830 bp = g_malloc(sizeof(*bp));
832 bp->pc = pc;
833 bp->flags = flags;
835 /* keep all GDB-injected breakpoints in front */
836 if (flags & BP_GDB) {
837 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
838 } else {
839 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
842 breakpoint_invalidate(cpu, pc);
844 if (breakpoint) {
845 *breakpoint = bp;
847 return 0;
850 /* Remove a specific breakpoint. */
851 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
853 CPUBreakpoint *bp;
855 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
856 if (bp->pc == pc && bp->flags == flags) {
857 cpu_breakpoint_remove_by_ref(cpu, bp);
858 return 0;
861 return -ENOENT;
864 /* Remove a specific breakpoint by reference. */
865 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
867 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
869 breakpoint_invalidate(cpu, breakpoint->pc);
871 g_free(breakpoint);
874 /* Remove all matching breakpoints. */
875 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
877 CPUBreakpoint *bp, *next;
879 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
880 if (bp->flags & mask) {
881 cpu_breakpoint_remove_by_ref(cpu, bp);
886 /* enable or disable single step mode. EXCP_DEBUG is returned by the
887 CPU loop after each instruction */
888 void cpu_single_step(CPUState *cpu, int enabled)
890 if (cpu->singlestep_enabled != enabled) {
891 cpu->singlestep_enabled = enabled;
892 if (kvm_enabled()) {
893 kvm_update_guest_debug(cpu, 0);
894 } else {
895 /* must flush all the translated code to avoid inconsistencies */
896 /* XXX: only flush what is necessary */
897 tb_flush(cpu);
902 void cpu_abort(CPUState *cpu, const char *fmt, ...)
904 va_list ap;
905 va_list ap2;
907 va_start(ap, fmt);
908 va_copy(ap2, ap);
909 fprintf(stderr, "qemu: fatal: ");
910 vfprintf(stderr, fmt, ap);
911 fprintf(stderr, "\n");
912 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
913 if (qemu_log_separate()) {
914 qemu_log("qemu: fatal: ");
915 qemu_log_vprintf(fmt, ap2);
916 qemu_log("\n");
917 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
918 qemu_log_flush();
919 qemu_log_close();
921 va_end(ap2);
922 va_end(ap);
923 replay_finish();
924 #if defined(CONFIG_USER_ONLY)
926 struct sigaction act;
927 sigfillset(&act.sa_mask);
928 act.sa_handler = SIG_DFL;
929 sigaction(SIGABRT, &act, NULL);
931 #endif
932 abort();
935 #if !defined(CONFIG_USER_ONLY)
936 /* Called from RCU critical section */
937 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
939 RAMBlock *block;
941 block = atomic_rcu_read(&ram_list.mru_block);
942 if (block && addr - block->offset < block->max_length) {
943 return block;
945 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
946 if (addr - block->offset < block->max_length) {
947 goto found;
951 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
952 abort();
954 found:
955 /* It is safe to write mru_block outside the iothread lock. This
956 * is what happens:
958 * mru_block = xxx
959 * rcu_read_unlock()
960 * xxx removed from list
961 * rcu_read_lock()
962 * read mru_block
963 * mru_block = NULL;
964 * call_rcu(reclaim_ramblock, xxx);
965 * rcu_read_unlock()
967 * atomic_rcu_set is not needed here. The block was already published
968 * when it was placed into the list. Here we're just making an extra
969 * copy of the pointer.
971 ram_list.mru_block = block;
972 return block;
975 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
977 CPUState *cpu;
978 ram_addr_t start1;
979 RAMBlock *block;
980 ram_addr_t end;
982 end = TARGET_PAGE_ALIGN(start + length);
983 start &= TARGET_PAGE_MASK;
985 rcu_read_lock();
986 block = qemu_get_ram_block(start);
987 assert(block == qemu_get_ram_block(end - 1));
988 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
989 CPU_FOREACH(cpu) {
990 tlb_reset_dirty(cpu, start1, length);
992 rcu_read_unlock();
995 /* Note: start and end must be within the same ram block. */
996 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
997 ram_addr_t length,
998 unsigned client)
1000 DirtyMemoryBlocks *blocks;
1001 unsigned long end, page;
1002 bool dirty = false;
1004 if (length == 0) {
1005 return false;
1008 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
1009 page = start >> TARGET_PAGE_BITS;
1011 rcu_read_lock();
1013 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1015 while (page < end) {
1016 unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1017 unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1018 unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1020 dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1021 offset, num);
1022 page += num;
1025 rcu_read_unlock();
1027 if (dirty && tcg_enabled()) {
1028 tlb_reset_dirty_range_all(start, length);
1031 return dirty;
1034 /* Called from RCU critical section */
1035 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1036 MemoryRegionSection *section,
1037 target_ulong vaddr,
1038 hwaddr paddr, hwaddr xlat,
1039 int prot,
1040 target_ulong *address)
1042 hwaddr iotlb;
1043 CPUWatchpoint *wp;
1045 if (memory_region_is_ram(section->mr)) {
1046 /* Normal RAM. */
1047 iotlb = memory_region_get_ram_addr(section->mr) + xlat;
1048 if (!section->readonly) {
1049 iotlb |= PHYS_SECTION_NOTDIRTY;
1050 } else {
1051 iotlb |= PHYS_SECTION_ROM;
1053 } else {
1054 AddressSpaceDispatch *d;
1056 d = atomic_rcu_read(&section->address_space->dispatch);
1057 iotlb = section - d->map.sections;
1058 iotlb += xlat;
1061 /* Make accesses to pages with watchpoints go via the
1062 watchpoint trap routines. */
1063 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1064 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1065 /* Avoid trapping reads of pages with a write breakpoint. */
1066 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1067 iotlb = PHYS_SECTION_WATCH + paddr;
1068 *address |= TLB_MMIO;
1069 break;
1074 return iotlb;
1076 #endif /* defined(CONFIG_USER_ONLY) */
1078 #if !defined(CONFIG_USER_ONLY)
1080 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1081 uint16_t section);
1082 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1084 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1085 qemu_anon_ram_alloc;
1088 * Set a custom physical guest memory alloator.
1089 * Accelerators with unusual needs may need this. Hopefully, we can
1090 * get rid of it eventually.
1092 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1094 phys_mem_alloc = alloc;
1097 static uint16_t phys_section_add(PhysPageMap *map,
1098 MemoryRegionSection *section)
1100 /* The physical section number is ORed with a page-aligned
1101 * pointer to produce the iotlb entries. Thus it should
1102 * never overflow into the page-aligned value.
1104 assert(map->sections_nb < TARGET_PAGE_SIZE);
1106 if (map->sections_nb == map->sections_nb_alloc) {
1107 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1108 map->sections = g_renew(MemoryRegionSection, map->sections,
1109 map->sections_nb_alloc);
1111 map->sections[map->sections_nb] = *section;
1112 memory_region_ref(section->mr);
1113 return map->sections_nb++;
1116 static void phys_section_destroy(MemoryRegion *mr)
1118 bool have_sub_page = mr->subpage;
1120 memory_region_unref(mr);
1122 if (have_sub_page) {
1123 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1124 object_unref(OBJECT(&subpage->iomem));
1125 g_free(subpage);
1129 static void phys_sections_free(PhysPageMap *map)
1131 while (map->sections_nb > 0) {
1132 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1133 phys_section_destroy(section->mr);
1135 g_free(map->sections);
1136 g_free(map->nodes);
1139 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1141 subpage_t *subpage;
1142 hwaddr base = section->offset_within_address_space
1143 & TARGET_PAGE_MASK;
1144 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1145 d->map.nodes, d->map.sections);
1146 MemoryRegionSection subsection = {
1147 .offset_within_address_space = base,
1148 .size = int128_make64(TARGET_PAGE_SIZE),
1150 hwaddr start, end;
1152 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1154 if (!(existing->mr->subpage)) {
1155 subpage = subpage_init(d->as, base);
1156 subsection.address_space = d->as;
1157 subsection.mr = &subpage->iomem;
1158 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1159 phys_section_add(&d->map, &subsection));
1160 } else {
1161 subpage = container_of(existing->mr, subpage_t, iomem);
1163 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1164 end = start + int128_get64(section->size) - 1;
1165 subpage_register(subpage, start, end,
1166 phys_section_add(&d->map, section));
1170 static void register_multipage(AddressSpaceDispatch *d,
1171 MemoryRegionSection *section)
1173 hwaddr start_addr = section->offset_within_address_space;
1174 uint16_t section_index = phys_section_add(&d->map, section);
1175 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1176 TARGET_PAGE_BITS));
1178 assert(num_pages);
1179 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1182 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1184 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1185 AddressSpaceDispatch *d = as->next_dispatch;
1186 MemoryRegionSection now = *section, remain = *section;
1187 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1189 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1190 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1191 - now.offset_within_address_space;
1193 now.size = int128_min(int128_make64(left), now.size);
1194 register_subpage(d, &now);
1195 } else {
1196 now.size = int128_zero();
1198 while (int128_ne(remain.size, now.size)) {
1199 remain.size = int128_sub(remain.size, now.size);
1200 remain.offset_within_address_space += int128_get64(now.size);
1201 remain.offset_within_region += int128_get64(now.size);
1202 now = remain;
1203 if (int128_lt(remain.size, page_size)) {
1204 register_subpage(d, &now);
1205 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1206 now.size = page_size;
1207 register_subpage(d, &now);
1208 } else {
1209 now.size = int128_and(now.size, int128_neg(page_size));
1210 register_multipage(d, &now);
1215 void qemu_flush_coalesced_mmio_buffer(void)
1217 if (kvm_enabled())
1218 kvm_flush_coalesced_mmio_buffer();
1221 void qemu_mutex_lock_ramlist(void)
1223 qemu_mutex_lock(&ram_list.mutex);
1226 void qemu_mutex_unlock_ramlist(void)
1228 qemu_mutex_unlock(&ram_list.mutex);
1231 #ifdef __linux__
1232 static int64_t get_file_size(int fd)
1234 int64_t size = lseek(fd, 0, SEEK_END);
1235 if (size < 0) {
1236 return -errno;
1238 return size;
1241 static void *file_ram_alloc(RAMBlock *block,
1242 ram_addr_t memory,
1243 const char *path,
1244 Error **errp)
1246 bool unlink_on_error = false;
1247 char *filename;
1248 char *sanitized_name;
1249 char *c;
1250 void *area = MAP_FAILED;
1251 int fd = -1;
1252 int64_t file_size;
1254 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1255 error_setg(errp,
1256 "host lacks kvm mmu notifiers, -mem-path unsupported");
1257 return NULL;
1260 for (;;) {
1261 fd = open(path, O_RDWR);
1262 if (fd >= 0) {
1263 /* @path names an existing file, use it */
1264 break;
1266 if (errno == ENOENT) {
1267 /* @path names a file that doesn't exist, create it */
1268 fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1269 if (fd >= 0) {
1270 unlink_on_error = true;
1271 break;
1273 } else if (errno == EISDIR) {
1274 /* @path names a directory, create a file there */
1275 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1276 sanitized_name = g_strdup(memory_region_name(block->mr));
1277 for (c = sanitized_name; *c != '\0'; c++) {
1278 if (*c == '/') {
1279 *c = '_';
1283 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1284 sanitized_name);
1285 g_free(sanitized_name);
1287 fd = mkstemp(filename);
1288 if (fd >= 0) {
1289 unlink(filename);
1290 g_free(filename);
1291 break;
1293 g_free(filename);
1295 if (errno != EEXIST && errno != EINTR) {
1296 error_setg_errno(errp, errno,
1297 "can't open backing store %s for guest RAM",
1298 path);
1299 goto error;
1302 * Try again on EINTR and EEXIST. The latter happens when
1303 * something else creates the file between our two open().
1307 block->page_size = qemu_fd_getpagesize(fd);
1308 block->mr->align = block->page_size;
1309 #if defined(__s390x__)
1310 if (kvm_enabled()) {
1311 block->mr->align = MAX(block->mr->align, QEMU_VMALLOC_ALIGN);
1313 #endif
1315 file_size = get_file_size(fd);
1317 if (memory < block->page_size) {
1318 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1319 "or larger than page size 0x%zx",
1320 memory, block->page_size);
1321 goto error;
1324 if (file_size > 0 && file_size < memory) {
1325 error_setg(errp, "backing store %s size 0x%" PRIx64
1326 " does not match 'size' option 0x" RAM_ADDR_FMT,
1327 path, file_size, memory);
1328 goto error;
1331 memory = ROUND_UP(memory, block->page_size);
1334 * ftruncate is not supported by hugetlbfs in older
1335 * hosts, so don't bother bailing out on errors.
1336 * If anything goes wrong with it under other filesystems,
1337 * mmap will fail.
1339 * Do not truncate the non-empty backend file to avoid corrupting
1340 * the existing data in the file. Disabling shrinking is not
1341 * enough. For example, the current vNVDIMM implementation stores
1342 * the guest NVDIMM labels at the end of the backend file. If the
1343 * backend file is later extended, QEMU will not be able to find
1344 * those labels. Therefore, extending the non-empty backend file
1345 * is disabled as well.
1347 if (!file_size && ftruncate(fd, memory)) {
1348 perror("ftruncate");
1351 area = qemu_ram_mmap(fd, memory, block->mr->align,
1352 block->flags & RAM_SHARED);
1353 if (area == MAP_FAILED) {
1354 error_setg_errno(errp, errno,
1355 "unable to map backing store for guest RAM");
1356 goto error;
1359 if (mem_prealloc) {
1360 os_mem_prealloc(fd, area, memory, errp);
1361 if (errp && *errp) {
1362 goto error;
1366 block->fd = fd;
1367 return area;
1369 error:
1370 if (area != MAP_FAILED) {
1371 qemu_ram_munmap(area, memory);
1373 if (unlink_on_error) {
1374 unlink(path);
1376 if (fd != -1) {
1377 close(fd);
1379 return NULL;
1381 #endif
1383 /* Called with the ramlist lock held. */
1384 static ram_addr_t find_ram_offset(ram_addr_t size)
1386 RAMBlock *block, *next_block;
1387 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1389 assert(size != 0); /* it would hand out same offset multiple times */
1391 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1392 return 0;
1395 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1396 ram_addr_t end, next = RAM_ADDR_MAX;
1398 end = block->offset + block->max_length;
1400 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1401 if (next_block->offset >= end) {
1402 next = MIN(next, next_block->offset);
1405 if (next - end >= size && next - end < mingap) {
1406 offset = end;
1407 mingap = next - end;
1411 if (offset == RAM_ADDR_MAX) {
1412 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1413 (uint64_t)size);
1414 abort();
1417 return offset;
1420 ram_addr_t last_ram_offset(void)
1422 RAMBlock *block;
1423 ram_addr_t last = 0;
1425 rcu_read_lock();
1426 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1427 last = MAX(last, block->offset + block->max_length);
1429 rcu_read_unlock();
1430 return last;
1433 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1435 int ret;
1437 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1438 if (!machine_dump_guest_core(current_machine)) {
1439 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1440 if (ret) {
1441 perror("qemu_madvise");
1442 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1443 "but dump_guest_core=off specified\n");
1448 const char *qemu_ram_get_idstr(RAMBlock *rb)
1450 return rb->idstr;
1453 /* Called with iothread lock held. */
1454 void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
1456 RAMBlock *block;
1458 assert(new_block);
1459 assert(!new_block->idstr[0]);
1461 if (dev) {
1462 char *id = qdev_get_dev_path(dev);
1463 if (id) {
1464 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1465 g_free(id);
1468 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1470 rcu_read_lock();
1471 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1472 if (block != new_block &&
1473 !strcmp(block->idstr, new_block->idstr)) {
1474 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1475 new_block->idstr);
1476 abort();
1479 rcu_read_unlock();
1482 /* Called with iothread lock held. */
1483 void qemu_ram_unset_idstr(RAMBlock *block)
1485 /* FIXME: arch_init.c assumes that this is not called throughout
1486 * migration. Ignore the problem since hot-unplug during migration
1487 * does not work anyway.
1489 if (block) {
1490 memset(block->idstr, 0, sizeof(block->idstr));
1494 size_t qemu_ram_pagesize(RAMBlock *rb)
1496 return rb->page_size;
1499 static int memory_try_enable_merging(void *addr, size_t len)
1501 if (!machine_mem_merge(current_machine)) {
1502 /* disabled by the user */
1503 return 0;
1506 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1509 /* Only legal before guest might have detected the memory size: e.g. on
1510 * incoming migration, or right after reset.
1512 * As memory core doesn't know how is memory accessed, it is up to
1513 * resize callback to update device state and/or add assertions to detect
1514 * misuse, if necessary.
1516 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
1518 assert(block);
1520 newsize = HOST_PAGE_ALIGN(newsize);
1522 if (block->used_length == newsize) {
1523 return 0;
1526 if (!(block->flags & RAM_RESIZEABLE)) {
1527 error_setg_errno(errp, EINVAL,
1528 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1529 " in != 0x" RAM_ADDR_FMT, block->idstr,
1530 newsize, block->used_length);
1531 return -EINVAL;
1534 if (block->max_length < newsize) {
1535 error_setg_errno(errp, EINVAL,
1536 "Length too large: %s: 0x" RAM_ADDR_FMT
1537 " > 0x" RAM_ADDR_FMT, block->idstr,
1538 newsize, block->max_length);
1539 return -EINVAL;
1542 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1543 block->used_length = newsize;
1544 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1545 DIRTY_CLIENTS_ALL);
1546 memory_region_set_size(block->mr, newsize);
1547 if (block->resized) {
1548 block->resized(block->idstr, newsize, block->host);
1550 return 0;
1553 /* Called with ram_list.mutex held */
1554 static void dirty_memory_extend(ram_addr_t old_ram_size,
1555 ram_addr_t new_ram_size)
1557 ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1558 DIRTY_MEMORY_BLOCK_SIZE);
1559 ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1560 DIRTY_MEMORY_BLOCK_SIZE);
1561 int i;
1563 /* Only need to extend if block count increased */
1564 if (new_num_blocks <= old_num_blocks) {
1565 return;
1568 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1569 DirtyMemoryBlocks *old_blocks;
1570 DirtyMemoryBlocks *new_blocks;
1571 int j;
1573 old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1574 new_blocks = g_malloc(sizeof(*new_blocks) +
1575 sizeof(new_blocks->blocks[0]) * new_num_blocks);
1577 if (old_num_blocks) {
1578 memcpy(new_blocks->blocks, old_blocks->blocks,
1579 old_num_blocks * sizeof(old_blocks->blocks[0]));
1582 for (j = old_num_blocks; j < new_num_blocks; j++) {
1583 new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1586 atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1588 if (old_blocks) {
1589 g_free_rcu(old_blocks, rcu);
1594 static void ram_block_add(RAMBlock *new_block, Error **errp)
1596 RAMBlock *block;
1597 RAMBlock *last_block = NULL;
1598 ram_addr_t old_ram_size, new_ram_size;
1599 Error *err = NULL;
1601 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1603 qemu_mutex_lock_ramlist();
1604 new_block->offset = find_ram_offset(new_block->max_length);
1606 if (!new_block->host) {
1607 if (xen_enabled()) {
1608 xen_ram_alloc(new_block->offset, new_block->max_length,
1609 new_block->mr, &err);
1610 if (err) {
1611 error_propagate(errp, err);
1612 qemu_mutex_unlock_ramlist();
1613 return;
1615 } else {
1616 new_block->host = phys_mem_alloc(new_block->max_length,
1617 &new_block->mr->align);
1618 if (!new_block->host) {
1619 error_setg_errno(errp, errno,
1620 "cannot set up guest memory '%s'",
1621 memory_region_name(new_block->mr));
1622 qemu_mutex_unlock_ramlist();
1623 return;
1625 memory_try_enable_merging(new_block->host, new_block->max_length);
1629 new_ram_size = MAX(old_ram_size,
1630 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1631 if (new_ram_size > old_ram_size) {
1632 migration_bitmap_extend(old_ram_size, new_ram_size);
1633 dirty_memory_extend(old_ram_size, new_ram_size);
1635 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1636 * QLIST (which has an RCU-friendly variant) does not have insertion at
1637 * tail, so save the last element in last_block.
1639 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1640 last_block = block;
1641 if (block->max_length < new_block->max_length) {
1642 break;
1645 if (block) {
1646 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1647 } else if (last_block) {
1648 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1649 } else { /* list is empty */
1650 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1652 ram_list.mru_block = NULL;
1654 /* Write list before version */
1655 smp_wmb();
1656 ram_list.version++;
1657 qemu_mutex_unlock_ramlist();
1659 cpu_physical_memory_set_dirty_range(new_block->offset,
1660 new_block->used_length,
1661 DIRTY_CLIENTS_ALL);
1663 if (new_block->host) {
1664 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1665 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1666 /* MADV_DONTFORK is also needed by KVM in absence of synchronous MMU */
1667 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1671 #ifdef __linux__
1672 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1673 bool share, const char *mem_path,
1674 Error **errp)
1676 RAMBlock *new_block;
1677 Error *local_err = NULL;
1679 if (xen_enabled()) {
1680 error_setg(errp, "-mem-path not supported with Xen");
1681 return NULL;
1684 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1686 * file_ram_alloc() needs to allocate just like
1687 * phys_mem_alloc, but we haven't bothered to provide
1688 * a hook there.
1690 error_setg(errp,
1691 "-mem-path not supported with this accelerator");
1692 return NULL;
1695 size = HOST_PAGE_ALIGN(size);
1696 new_block = g_malloc0(sizeof(*new_block));
1697 new_block->mr = mr;
1698 new_block->used_length = size;
1699 new_block->max_length = size;
1700 new_block->flags = share ? RAM_SHARED : 0;
1701 new_block->host = file_ram_alloc(new_block, size,
1702 mem_path, errp);
1703 if (!new_block->host) {
1704 g_free(new_block);
1705 return NULL;
1708 ram_block_add(new_block, &local_err);
1709 if (local_err) {
1710 g_free(new_block);
1711 error_propagate(errp, local_err);
1712 return NULL;
1714 return new_block;
1716 #endif
1718 static
1719 RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1720 void (*resized)(const char*,
1721 uint64_t length,
1722 void *host),
1723 void *host, bool resizeable,
1724 MemoryRegion *mr, Error **errp)
1726 RAMBlock *new_block;
1727 Error *local_err = NULL;
1729 size = HOST_PAGE_ALIGN(size);
1730 max_size = HOST_PAGE_ALIGN(max_size);
1731 new_block = g_malloc0(sizeof(*new_block));
1732 new_block->mr = mr;
1733 new_block->resized = resized;
1734 new_block->used_length = size;
1735 new_block->max_length = max_size;
1736 assert(max_size >= size);
1737 new_block->fd = -1;
1738 new_block->page_size = getpagesize();
1739 new_block->host = host;
1740 if (host) {
1741 new_block->flags |= RAM_PREALLOC;
1743 if (resizeable) {
1744 new_block->flags |= RAM_RESIZEABLE;
1746 ram_block_add(new_block, &local_err);
1747 if (local_err) {
1748 g_free(new_block);
1749 error_propagate(errp, local_err);
1750 return NULL;
1752 return new_block;
1755 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1756 MemoryRegion *mr, Error **errp)
1758 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1761 RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1763 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1766 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1767 void (*resized)(const char*,
1768 uint64_t length,
1769 void *host),
1770 MemoryRegion *mr, Error **errp)
1772 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1775 static void reclaim_ramblock(RAMBlock *block)
1777 if (block->flags & RAM_PREALLOC) {
1779 } else if (xen_enabled()) {
1780 xen_invalidate_map_cache_entry(block->host);
1781 #ifndef _WIN32
1782 } else if (block->fd >= 0) {
1783 qemu_ram_munmap(block->host, block->max_length);
1784 close(block->fd);
1785 #endif
1786 } else {
1787 qemu_anon_ram_free(block->host, block->max_length);
1789 g_free(block);
1792 void qemu_ram_free(RAMBlock *block)
1794 if (!block) {
1795 return;
1798 qemu_mutex_lock_ramlist();
1799 QLIST_REMOVE_RCU(block, next);
1800 ram_list.mru_block = NULL;
1801 /* Write list before version */
1802 smp_wmb();
1803 ram_list.version++;
1804 call_rcu(block, reclaim_ramblock, rcu);
1805 qemu_mutex_unlock_ramlist();
1808 #ifndef _WIN32
1809 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1811 RAMBlock *block;
1812 ram_addr_t offset;
1813 int flags;
1814 void *area, *vaddr;
1816 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1817 offset = addr - block->offset;
1818 if (offset < block->max_length) {
1819 vaddr = ramblock_ptr(block, offset);
1820 if (block->flags & RAM_PREALLOC) {
1822 } else if (xen_enabled()) {
1823 abort();
1824 } else {
1825 flags = MAP_FIXED;
1826 if (block->fd >= 0) {
1827 flags |= (block->flags & RAM_SHARED ?
1828 MAP_SHARED : MAP_PRIVATE);
1829 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1830 flags, block->fd, offset);
1831 } else {
1833 * Remap needs to match alloc. Accelerators that
1834 * set phys_mem_alloc never remap. If they did,
1835 * we'd need a remap hook here.
1837 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1839 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1840 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1841 flags, -1, 0);
1843 if (area != vaddr) {
1844 fprintf(stderr, "Could not remap addr: "
1845 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1846 length, addr);
1847 exit(1);
1849 memory_try_enable_merging(vaddr, length);
1850 qemu_ram_setup_dump(vaddr, length);
1855 #endif /* !_WIN32 */
1857 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1858 * This should not be used for general purpose DMA. Use address_space_map
1859 * or address_space_rw instead. For local memory (e.g. video ram) that the
1860 * device owns, use memory_region_get_ram_ptr.
1862 * Called within RCU critical section.
1864 void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1866 RAMBlock *block = ram_block;
1868 if (block == NULL) {
1869 block = qemu_get_ram_block(addr);
1870 addr -= block->offset;
1873 if (xen_enabled() && block->host == NULL) {
1874 /* We need to check if the requested address is in the RAM
1875 * because we don't want to map the entire memory in QEMU.
1876 * In that case just map until the end of the page.
1878 if (block->offset == 0) {
1879 return xen_map_cache(addr, 0, 0);
1882 block->host = xen_map_cache(block->offset, block->max_length, 1);
1884 return ramblock_ptr(block, addr);
1887 /* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr
1888 * but takes a size argument.
1890 * Called within RCU critical section.
1892 static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1893 hwaddr *size)
1895 RAMBlock *block = ram_block;
1896 if (*size == 0) {
1897 return NULL;
1900 if (block == NULL) {
1901 block = qemu_get_ram_block(addr);
1902 addr -= block->offset;
1904 *size = MIN(*size, block->max_length - addr);
1906 if (xen_enabled() && block->host == NULL) {
1907 /* We need to check if the requested address is in the RAM
1908 * because we don't want to map the entire memory in QEMU.
1909 * In that case just map the requested area.
1911 if (block->offset == 0) {
1912 return xen_map_cache(addr, *size, 1);
1915 block->host = xen_map_cache(block->offset, block->max_length, 1);
1918 return ramblock_ptr(block, addr);
1922 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1923 * in that RAMBlock.
1925 * ptr: Host pointer to look up
1926 * round_offset: If true round the result offset down to a page boundary
1927 * *ram_addr: set to result ram_addr
1928 * *offset: set to result offset within the RAMBlock
1930 * Returns: RAMBlock (or NULL if not found)
1932 * By the time this function returns, the returned pointer is not protected
1933 * by RCU anymore. If the caller is not within an RCU critical section and
1934 * does not hold the iothread lock, it must have other means of protecting the
1935 * pointer, such as a reference to the region that includes the incoming
1936 * ram_addr_t.
1938 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1939 ram_addr_t *offset)
1941 RAMBlock *block;
1942 uint8_t *host = ptr;
1944 if (xen_enabled()) {
1945 ram_addr_t ram_addr;
1946 rcu_read_lock();
1947 ram_addr = xen_ram_addr_from_mapcache(ptr);
1948 block = qemu_get_ram_block(ram_addr);
1949 if (block) {
1950 *offset = ram_addr - block->offset;
1952 rcu_read_unlock();
1953 return block;
1956 rcu_read_lock();
1957 block = atomic_rcu_read(&ram_list.mru_block);
1958 if (block && block->host && host - block->host < block->max_length) {
1959 goto found;
1962 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1963 /* This case append when the block is not mapped. */
1964 if (block->host == NULL) {
1965 continue;
1967 if (host - block->host < block->max_length) {
1968 goto found;
1972 rcu_read_unlock();
1973 return NULL;
1975 found:
1976 *offset = (host - block->host);
1977 if (round_offset) {
1978 *offset &= TARGET_PAGE_MASK;
1980 rcu_read_unlock();
1981 return block;
1985 * Finds the named RAMBlock
1987 * name: The name of RAMBlock to find
1989 * Returns: RAMBlock (or NULL if not found)
1991 RAMBlock *qemu_ram_block_by_name(const char *name)
1993 RAMBlock *block;
1995 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1996 if (!strcmp(name, block->idstr)) {
1997 return block;
2001 return NULL;
2004 /* Some of the softmmu routines need to translate from a host pointer
2005 (typically a TLB entry) back to a ram offset. */
2006 ram_addr_t qemu_ram_addr_from_host(void *ptr)
2008 RAMBlock *block;
2009 ram_addr_t offset;
2011 block = qemu_ram_block_from_host(ptr, false, &offset);
2012 if (!block) {
2013 return RAM_ADDR_INVALID;
2016 return block->offset + offset;
2019 /* Called within RCU critical section. */
2020 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2021 uint64_t val, unsigned size)
2023 bool locked = false;
2025 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
2026 locked = true;
2027 tb_lock();
2028 tb_invalidate_phys_page_fast(ram_addr, size);
2030 switch (size) {
2031 case 1:
2032 stb_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2033 break;
2034 case 2:
2035 stw_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2036 break;
2037 case 4:
2038 stl_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2039 break;
2040 default:
2041 abort();
2044 if (locked) {
2045 tb_unlock();
2048 /* Set both VGA and migration bits for simplicity and to remove
2049 * the notdirty callback faster.
2051 cpu_physical_memory_set_dirty_range(ram_addr, size,
2052 DIRTY_CLIENTS_NOCODE);
2053 /* we remove the notdirty callback only if the code has been
2054 flushed */
2055 if (!cpu_physical_memory_is_clean(ram_addr)) {
2056 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2060 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2061 unsigned size, bool is_write)
2063 return is_write;
2066 static const MemoryRegionOps notdirty_mem_ops = {
2067 .write = notdirty_mem_write,
2068 .valid.accepts = notdirty_mem_accepts,
2069 .endianness = DEVICE_NATIVE_ENDIAN,
2072 /* Generate a debug exception if a watchpoint has been hit. */
2073 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2075 CPUState *cpu = current_cpu;
2076 CPUClass *cc = CPU_GET_CLASS(cpu);
2077 CPUArchState *env = cpu->env_ptr;
2078 target_ulong pc, cs_base;
2079 target_ulong vaddr;
2080 CPUWatchpoint *wp;
2081 uint32_t cpu_flags;
2083 if (cpu->watchpoint_hit) {
2084 /* We re-entered the check after replacing the TB. Now raise
2085 * the debug interrupt so that is will trigger after the
2086 * current instruction. */
2087 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2088 return;
2090 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2091 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2092 if (cpu_watchpoint_address_matches(wp, vaddr, len)
2093 && (wp->flags & flags)) {
2094 if (flags == BP_MEM_READ) {
2095 wp->flags |= BP_WATCHPOINT_HIT_READ;
2096 } else {
2097 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2099 wp->hitaddr = vaddr;
2100 wp->hitattrs = attrs;
2101 if (!cpu->watchpoint_hit) {
2102 if (wp->flags & BP_CPU &&
2103 !cc->debug_check_watchpoint(cpu, wp)) {
2104 wp->flags &= ~BP_WATCHPOINT_HIT;
2105 continue;
2107 cpu->watchpoint_hit = wp;
2109 /* The tb_lock will be reset when cpu_loop_exit or
2110 * cpu_loop_exit_noexc longjmp back into the cpu_exec
2111 * main loop.
2113 tb_lock();
2114 tb_check_watchpoint(cpu);
2115 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2116 cpu->exception_index = EXCP_DEBUG;
2117 cpu_loop_exit(cpu);
2118 } else {
2119 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2120 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2121 cpu_loop_exit_noexc(cpu);
2124 } else {
2125 wp->flags &= ~BP_WATCHPOINT_HIT;
2130 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2131 so these check for a hit then pass through to the normal out-of-line
2132 phys routines. */
2133 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2134 unsigned size, MemTxAttrs attrs)
2136 MemTxResult res;
2137 uint64_t data;
2138 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2139 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2141 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2142 switch (size) {
2143 case 1:
2144 data = address_space_ldub(as, addr, attrs, &res);
2145 break;
2146 case 2:
2147 data = address_space_lduw(as, addr, attrs, &res);
2148 break;
2149 case 4:
2150 data = address_space_ldl(as, addr, attrs, &res);
2151 break;
2152 default: abort();
2154 *pdata = data;
2155 return res;
2158 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2159 uint64_t val, unsigned size,
2160 MemTxAttrs attrs)
2162 MemTxResult res;
2163 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2164 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2166 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2167 switch (size) {
2168 case 1:
2169 address_space_stb(as, addr, val, attrs, &res);
2170 break;
2171 case 2:
2172 address_space_stw(as, addr, val, attrs, &res);
2173 break;
2174 case 4:
2175 address_space_stl(as, addr, val, attrs, &res);
2176 break;
2177 default: abort();
2179 return res;
2182 static const MemoryRegionOps watch_mem_ops = {
2183 .read_with_attrs = watch_mem_read,
2184 .write_with_attrs = watch_mem_write,
2185 .endianness = DEVICE_NATIVE_ENDIAN,
2188 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2189 unsigned len, MemTxAttrs attrs)
2191 subpage_t *subpage = opaque;
2192 uint8_t buf[8];
2193 MemTxResult res;
2195 #if defined(DEBUG_SUBPAGE)
2196 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2197 subpage, len, addr);
2198 #endif
2199 res = address_space_read(subpage->as, addr + subpage->base,
2200 attrs, buf, len);
2201 if (res) {
2202 return res;
2204 switch (len) {
2205 case 1:
2206 *data = ldub_p(buf);
2207 return MEMTX_OK;
2208 case 2:
2209 *data = lduw_p(buf);
2210 return MEMTX_OK;
2211 case 4:
2212 *data = ldl_p(buf);
2213 return MEMTX_OK;
2214 case 8:
2215 *data = ldq_p(buf);
2216 return MEMTX_OK;
2217 default:
2218 abort();
2222 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2223 uint64_t value, unsigned len, MemTxAttrs attrs)
2225 subpage_t *subpage = opaque;
2226 uint8_t buf[8];
2228 #if defined(DEBUG_SUBPAGE)
2229 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2230 " value %"PRIx64"\n",
2231 __func__, subpage, len, addr, value);
2232 #endif
2233 switch (len) {
2234 case 1:
2235 stb_p(buf, value);
2236 break;
2237 case 2:
2238 stw_p(buf, value);
2239 break;
2240 case 4:
2241 stl_p(buf, value);
2242 break;
2243 case 8:
2244 stq_p(buf, value);
2245 break;
2246 default:
2247 abort();
2249 return address_space_write(subpage->as, addr + subpage->base,
2250 attrs, buf, len);
2253 static bool subpage_accepts(void *opaque, hwaddr addr,
2254 unsigned len, bool is_write)
2256 subpage_t *subpage = opaque;
2257 #if defined(DEBUG_SUBPAGE)
2258 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2259 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2260 #endif
2262 return address_space_access_valid(subpage->as, addr + subpage->base,
2263 len, is_write);
2266 static const MemoryRegionOps subpage_ops = {
2267 .read_with_attrs = subpage_read,
2268 .write_with_attrs = subpage_write,
2269 .impl.min_access_size = 1,
2270 .impl.max_access_size = 8,
2271 .valid.min_access_size = 1,
2272 .valid.max_access_size = 8,
2273 .valid.accepts = subpage_accepts,
2274 .endianness = DEVICE_NATIVE_ENDIAN,
2277 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2278 uint16_t section)
2280 int idx, eidx;
2282 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2283 return -1;
2284 idx = SUBPAGE_IDX(start);
2285 eidx = SUBPAGE_IDX(end);
2286 #if defined(DEBUG_SUBPAGE)
2287 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2288 __func__, mmio, start, end, idx, eidx, section);
2289 #endif
2290 for (; idx <= eidx; idx++) {
2291 mmio->sub_section[idx] = section;
2294 return 0;
2297 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2299 subpage_t *mmio;
2301 mmio = g_malloc0(sizeof(subpage_t) + TARGET_PAGE_SIZE * sizeof(uint16_t));
2302 mmio->as = as;
2303 mmio->base = base;
2304 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2305 NULL, TARGET_PAGE_SIZE);
2306 mmio->iomem.subpage = true;
2307 #if defined(DEBUG_SUBPAGE)
2308 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2309 mmio, base, TARGET_PAGE_SIZE);
2310 #endif
2311 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2313 return mmio;
2316 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2317 MemoryRegion *mr)
2319 assert(as);
2320 MemoryRegionSection section = {
2321 .address_space = as,
2322 .mr = mr,
2323 .offset_within_address_space = 0,
2324 .offset_within_region = 0,
2325 .size = int128_2_64(),
2328 return phys_section_add(map, &section);
2331 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2333 int asidx = cpu_asidx_from_attrs(cpu, attrs);
2334 CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2335 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2336 MemoryRegionSection *sections = d->map.sections;
2338 return sections[index & ~TARGET_PAGE_MASK].mr;
2341 static void io_mem_init(void)
2343 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2344 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2345 NULL, UINT64_MAX);
2346 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2347 NULL, UINT64_MAX);
2348 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2349 NULL, UINT64_MAX);
2352 static void mem_begin(MemoryListener *listener)
2354 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2355 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2356 uint16_t n;
2358 n = dummy_section(&d->map, as, &io_mem_unassigned);
2359 assert(n == PHYS_SECTION_UNASSIGNED);
2360 n = dummy_section(&d->map, as, &io_mem_notdirty);
2361 assert(n == PHYS_SECTION_NOTDIRTY);
2362 n = dummy_section(&d->map, as, &io_mem_rom);
2363 assert(n == PHYS_SECTION_ROM);
2364 n = dummy_section(&d->map, as, &io_mem_watch);
2365 assert(n == PHYS_SECTION_WATCH);
2367 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2368 d->as = as;
2369 as->next_dispatch = d;
2372 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2374 phys_sections_free(&d->map);
2375 g_free(d);
2378 static void mem_commit(MemoryListener *listener)
2380 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2381 AddressSpaceDispatch *cur = as->dispatch;
2382 AddressSpaceDispatch *next = as->next_dispatch;
2384 phys_page_compact_all(next, next->map.nodes_nb);
2386 atomic_rcu_set(&as->dispatch, next);
2387 if (cur) {
2388 call_rcu(cur, address_space_dispatch_free, rcu);
2392 static void tcg_commit(MemoryListener *listener)
2394 CPUAddressSpace *cpuas;
2395 AddressSpaceDispatch *d;
2397 /* since each CPU stores ram addresses in its TLB cache, we must
2398 reset the modified entries */
2399 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2400 cpu_reloading_memory_map();
2401 /* The CPU and TLB are protected by the iothread lock.
2402 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2403 * may have split the RCU critical section.
2405 d = atomic_rcu_read(&cpuas->as->dispatch);
2406 atomic_rcu_set(&cpuas->memory_dispatch, d);
2407 tlb_flush(cpuas->cpu, 1);
2410 void address_space_init_dispatch(AddressSpace *as)
2412 as->dispatch = NULL;
2413 as->dispatch_listener = (MemoryListener) {
2414 .begin = mem_begin,
2415 .commit = mem_commit,
2416 .region_add = mem_add,
2417 .region_nop = mem_add,
2418 .priority = 0,
2420 memory_listener_register(&as->dispatch_listener, as);
2423 void address_space_unregister(AddressSpace *as)
2425 memory_listener_unregister(&as->dispatch_listener);
2428 void address_space_destroy_dispatch(AddressSpace *as)
2430 AddressSpaceDispatch *d = as->dispatch;
2432 atomic_rcu_set(&as->dispatch, NULL);
2433 if (d) {
2434 call_rcu(d, address_space_dispatch_free, rcu);
2438 static void memory_map_init(void)
2440 system_memory = g_malloc(sizeof(*system_memory));
2442 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2443 address_space_init(&address_space_memory, system_memory, "memory");
2445 system_io = g_malloc(sizeof(*system_io));
2446 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2447 65536);
2448 address_space_init(&address_space_io, system_io, "I/O");
2451 MemoryRegion *get_system_memory(void)
2453 return system_memory;
2456 MemoryRegion *get_system_io(void)
2458 return system_io;
2461 #endif /* !defined(CONFIG_USER_ONLY) */
2463 /* physical memory access (slow version, mainly for debug) */
2464 #if defined(CONFIG_USER_ONLY)
2465 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2466 uint8_t *buf, int len, int is_write)
2468 int l, flags;
2469 target_ulong page;
2470 void * p;
2472 while (len > 0) {
2473 page = addr & TARGET_PAGE_MASK;
2474 l = (page + TARGET_PAGE_SIZE) - addr;
2475 if (l > len)
2476 l = len;
2477 flags = page_get_flags(page);
2478 if (!(flags & PAGE_VALID))
2479 return -1;
2480 if (is_write) {
2481 if (!(flags & PAGE_WRITE))
2482 return -1;
2483 /* XXX: this code should not depend on lock_user */
2484 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2485 return -1;
2486 memcpy(p, buf, l);
2487 unlock_user(p, addr, l);
2488 } else {
2489 if (!(flags & PAGE_READ))
2490 return -1;
2491 /* XXX: this code should not depend on lock_user */
2492 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2493 return -1;
2494 memcpy(buf, p, l);
2495 unlock_user(p, addr, 0);
2497 len -= l;
2498 buf += l;
2499 addr += l;
2501 return 0;
2504 #else
2506 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2507 hwaddr length)
2509 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2510 addr += memory_region_get_ram_addr(mr);
2512 /* No early return if dirty_log_mask is or becomes 0, because
2513 * cpu_physical_memory_set_dirty_range will still call
2514 * xen_modified_memory.
2516 if (dirty_log_mask) {
2517 dirty_log_mask =
2518 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2520 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2521 tb_lock();
2522 tb_invalidate_phys_range(addr, addr + length);
2523 tb_unlock();
2524 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2526 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2529 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2531 unsigned access_size_max = mr->ops->valid.max_access_size;
2533 /* Regions are assumed to support 1-4 byte accesses unless
2534 otherwise specified. */
2535 if (access_size_max == 0) {
2536 access_size_max = 4;
2539 /* Bound the maximum access by the alignment of the address. */
2540 if (!mr->ops->impl.unaligned) {
2541 unsigned align_size_max = addr & -addr;
2542 if (align_size_max != 0 && align_size_max < access_size_max) {
2543 access_size_max = align_size_max;
2547 /* Don't attempt accesses larger than the maximum. */
2548 if (l > access_size_max) {
2549 l = access_size_max;
2551 l = pow2floor(l);
2553 return l;
2556 static bool prepare_mmio_access(MemoryRegion *mr)
2558 bool unlocked = !qemu_mutex_iothread_locked();
2559 bool release_lock = false;
2561 if (unlocked && mr->global_locking) {
2562 qemu_mutex_lock_iothread();
2563 unlocked = false;
2564 release_lock = true;
2566 if (mr->flush_coalesced_mmio) {
2567 if (unlocked) {
2568 qemu_mutex_lock_iothread();
2570 qemu_flush_coalesced_mmio_buffer();
2571 if (unlocked) {
2572 qemu_mutex_unlock_iothread();
2576 return release_lock;
2579 /* Called within RCU critical section. */
2580 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2581 MemTxAttrs attrs,
2582 const uint8_t *buf,
2583 int len, hwaddr addr1,
2584 hwaddr l, MemoryRegion *mr)
2586 uint8_t *ptr;
2587 uint64_t val;
2588 MemTxResult result = MEMTX_OK;
2589 bool release_lock = false;
2591 for (;;) {
2592 if (!memory_access_is_direct(mr, true)) {
2593 release_lock |= prepare_mmio_access(mr);
2594 l = memory_access_size(mr, l, addr1);
2595 /* XXX: could force current_cpu to NULL to avoid
2596 potential bugs */
2597 switch (l) {
2598 case 8:
2599 /* 64 bit write access */
2600 val = ldq_p(buf);
2601 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2602 attrs);
2603 break;
2604 case 4:
2605 /* 32 bit write access */
2606 val = ldl_p(buf);
2607 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2608 attrs);
2609 break;
2610 case 2:
2611 /* 16 bit write access */
2612 val = lduw_p(buf);
2613 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2614 attrs);
2615 break;
2616 case 1:
2617 /* 8 bit write access */
2618 val = ldub_p(buf);
2619 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2620 attrs);
2621 break;
2622 default:
2623 abort();
2625 } else {
2626 /* RAM case */
2627 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2628 memcpy(ptr, buf, l);
2629 invalidate_and_set_dirty(mr, addr1, l);
2632 if (release_lock) {
2633 qemu_mutex_unlock_iothread();
2634 release_lock = false;
2637 len -= l;
2638 buf += l;
2639 addr += l;
2641 if (!len) {
2642 break;
2645 l = len;
2646 mr = address_space_translate(as, addr, &addr1, &l, true);
2649 return result;
2652 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2653 const uint8_t *buf, int len)
2655 hwaddr l;
2656 hwaddr addr1;
2657 MemoryRegion *mr;
2658 MemTxResult result = MEMTX_OK;
2660 if (len > 0) {
2661 rcu_read_lock();
2662 l = len;
2663 mr = address_space_translate(as, addr, &addr1, &l, true);
2664 result = address_space_write_continue(as, addr, attrs, buf, len,
2665 addr1, l, mr);
2666 rcu_read_unlock();
2669 return result;
2672 /* Called within RCU critical section. */
2673 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2674 MemTxAttrs attrs, uint8_t *buf,
2675 int len, hwaddr addr1, hwaddr l,
2676 MemoryRegion *mr)
2678 uint8_t *ptr;
2679 uint64_t val;
2680 MemTxResult result = MEMTX_OK;
2681 bool release_lock = false;
2683 for (;;) {
2684 if (!memory_access_is_direct(mr, false)) {
2685 /* I/O case */
2686 release_lock |= prepare_mmio_access(mr);
2687 l = memory_access_size(mr, l, addr1);
2688 switch (l) {
2689 case 8:
2690 /* 64 bit read access */
2691 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2692 attrs);
2693 stq_p(buf, val);
2694 break;
2695 case 4:
2696 /* 32 bit read access */
2697 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2698 attrs);
2699 stl_p(buf, val);
2700 break;
2701 case 2:
2702 /* 16 bit read access */
2703 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2704 attrs);
2705 stw_p(buf, val);
2706 break;
2707 case 1:
2708 /* 8 bit read access */
2709 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2710 attrs);
2711 stb_p(buf, val);
2712 break;
2713 default:
2714 abort();
2716 } else {
2717 /* RAM case */
2718 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2719 memcpy(buf, ptr, l);
2722 if (release_lock) {
2723 qemu_mutex_unlock_iothread();
2724 release_lock = false;
2727 len -= l;
2728 buf += l;
2729 addr += l;
2731 if (!len) {
2732 break;
2735 l = len;
2736 mr = address_space_translate(as, addr, &addr1, &l, false);
2739 return result;
2742 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2743 MemTxAttrs attrs, uint8_t *buf, int len)
2745 hwaddr l;
2746 hwaddr addr1;
2747 MemoryRegion *mr;
2748 MemTxResult result = MEMTX_OK;
2750 if (len > 0) {
2751 rcu_read_lock();
2752 l = len;
2753 mr = address_space_translate(as, addr, &addr1, &l, false);
2754 result = address_space_read_continue(as, addr, attrs, buf, len,
2755 addr1, l, mr);
2756 rcu_read_unlock();
2759 return result;
2762 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2763 uint8_t *buf, int len, bool is_write)
2765 if (is_write) {
2766 return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2767 } else {
2768 return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2772 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2773 int len, int is_write)
2775 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2776 buf, len, is_write);
2779 enum write_rom_type {
2780 WRITE_DATA,
2781 FLUSH_CACHE,
2784 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2785 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2787 hwaddr l;
2788 uint8_t *ptr;
2789 hwaddr addr1;
2790 MemoryRegion *mr;
2792 rcu_read_lock();
2793 while (len > 0) {
2794 l = len;
2795 mr = address_space_translate(as, addr, &addr1, &l, true);
2797 if (!(memory_region_is_ram(mr) ||
2798 memory_region_is_romd(mr))) {
2799 l = memory_access_size(mr, l, addr1);
2800 } else {
2801 /* ROM/RAM case */
2802 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2803 switch (type) {
2804 case WRITE_DATA:
2805 memcpy(ptr, buf, l);
2806 invalidate_and_set_dirty(mr, addr1, l);
2807 break;
2808 case FLUSH_CACHE:
2809 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2810 break;
2813 len -= l;
2814 buf += l;
2815 addr += l;
2817 rcu_read_unlock();
2820 /* used for ROM loading : can write in RAM and ROM */
2821 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2822 const uint8_t *buf, int len)
2824 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2827 void cpu_flush_icache_range(hwaddr start, int len)
2830 * This function should do the same thing as an icache flush that was
2831 * triggered from within the guest. For TCG we are always cache coherent,
2832 * so there is no need to flush anything. For KVM / Xen we need to flush
2833 * the host's instruction cache at least.
2835 if (tcg_enabled()) {
2836 return;
2839 cpu_physical_memory_write_rom_internal(&address_space_memory,
2840 start, NULL, len, FLUSH_CACHE);
2843 typedef struct {
2844 MemoryRegion *mr;
2845 void *buffer;
2846 hwaddr addr;
2847 hwaddr len;
2848 bool in_use;
2849 } BounceBuffer;
2851 static BounceBuffer bounce;
2853 typedef struct MapClient {
2854 QEMUBH *bh;
2855 QLIST_ENTRY(MapClient) link;
2856 } MapClient;
2858 QemuMutex map_client_list_lock;
2859 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2860 = QLIST_HEAD_INITIALIZER(map_client_list);
2862 static void cpu_unregister_map_client_do(MapClient *client)
2864 QLIST_REMOVE(client, link);
2865 g_free(client);
2868 static void cpu_notify_map_clients_locked(void)
2870 MapClient *client;
2872 while (!QLIST_EMPTY(&map_client_list)) {
2873 client = QLIST_FIRST(&map_client_list);
2874 qemu_bh_schedule(client->bh);
2875 cpu_unregister_map_client_do(client);
2879 void cpu_register_map_client(QEMUBH *bh)
2881 MapClient *client = g_malloc(sizeof(*client));
2883 qemu_mutex_lock(&map_client_list_lock);
2884 client->bh = bh;
2885 QLIST_INSERT_HEAD(&map_client_list, client, link);
2886 if (!atomic_read(&bounce.in_use)) {
2887 cpu_notify_map_clients_locked();
2889 qemu_mutex_unlock(&map_client_list_lock);
2892 void cpu_exec_init_all(void)
2894 qemu_mutex_init(&ram_list.mutex);
2895 /* The data structures we set up here depend on knowing the page size,
2896 * so no more changes can be made after this point.
2897 * In an ideal world, nothing we did before we had finished the
2898 * machine setup would care about the target page size, and we could
2899 * do this much later, rather than requiring board models to state
2900 * up front what their requirements are.
2902 finalize_target_page_bits();
2903 io_mem_init();
2904 memory_map_init();
2905 qemu_mutex_init(&map_client_list_lock);
2908 void cpu_unregister_map_client(QEMUBH *bh)
2910 MapClient *client;
2912 qemu_mutex_lock(&map_client_list_lock);
2913 QLIST_FOREACH(client, &map_client_list, link) {
2914 if (client->bh == bh) {
2915 cpu_unregister_map_client_do(client);
2916 break;
2919 qemu_mutex_unlock(&map_client_list_lock);
2922 static void cpu_notify_map_clients(void)
2924 qemu_mutex_lock(&map_client_list_lock);
2925 cpu_notify_map_clients_locked();
2926 qemu_mutex_unlock(&map_client_list_lock);
2929 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2931 MemoryRegion *mr;
2932 hwaddr l, xlat;
2934 rcu_read_lock();
2935 while (len > 0) {
2936 l = len;
2937 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2938 if (!memory_access_is_direct(mr, is_write)) {
2939 l = memory_access_size(mr, l, addr);
2940 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2941 return false;
2945 len -= l;
2946 addr += l;
2948 rcu_read_unlock();
2949 return true;
2952 /* Map a physical memory region into a host virtual address.
2953 * May map a subset of the requested range, given by and returned in *plen.
2954 * May return NULL if resources needed to perform the mapping are exhausted.
2955 * Use only for reads OR writes - not for read-modify-write operations.
2956 * Use cpu_register_map_client() to know when retrying the map operation is
2957 * likely to succeed.
2959 void *address_space_map(AddressSpace *as,
2960 hwaddr addr,
2961 hwaddr *plen,
2962 bool is_write)
2964 hwaddr len = *plen;
2965 hwaddr done = 0;
2966 hwaddr l, xlat, base;
2967 MemoryRegion *mr, *this_mr;
2968 void *ptr;
2970 if (len == 0) {
2971 return NULL;
2974 l = len;
2975 rcu_read_lock();
2976 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2978 if (!memory_access_is_direct(mr, is_write)) {
2979 if (atomic_xchg(&bounce.in_use, true)) {
2980 rcu_read_unlock();
2981 return NULL;
2983 /* Avoid unbounded allocations */
2984 l = MIN(l, TARGET_PAGE_SIZE);
2985 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2986 bounce.addr = addr;
2987 bounce.len = l;
2989 memory_region_ref(mr);
2990 bounce.mr = mr;
2991 if (!is_write) {
2992 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2993 bounce.buffer, l);
2996 rcu_read_unlock();
2997 *plen = l;
2998 return bounce.buffer;
3001 base = xlat;
3003 for (;;) {
3004 len -= l;
3005 addr += l;
3006 done += l;
3007 if (len == 0) {
3008 break;
3011 l = len;
3012 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
3013 if (this_mr != mr || xlat != base + done) {
3014 break;
3018 memory_region_ref(mr);
3019 *plen = done;
3020 ptr = qemu_ram_ptr_length(mr->ram_block, base, plen);
3021 rcu_read_unlock();
3023 return ptr;
3026 /* Unmaps a memory region previously mapped by address_space_map().
3027 * Will also mark the memory as dirty if is_write == 1. access_len gives
3028 * the amount of memory that was actually read or written by the caller.
3030 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3031 int is_write, hwaddr access_len)
3033 if (buffer != bounce.buffer) {
3034 MemoryRegion *mr;
3035 ram_addr_t addr1;
3037 mr = memory_region_from_host(buffer, &addr1);
3038 assert(mr != NULL);
3039 if (is_write) {
3040 invalidate_and_set_dirty(mr, addr1, access_len);
3042 if (xen_enabled()) {
3043 xen_invalidate_map_cache_entry(buffer);
3045 memory_region_unref(mr);
3046 return;
3048 if (is_write) {
3049 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
3050 bounce.buffer, access_len);
3052 qemu_vfree(bounce.buffer);
3053 bounce.buffer = NULL;
3054 memory_region_unref(bounce.mr);
3055 atomic_mb_set(&bounce.in_use, false);
3056 cpu_notify_map_clients();
3059 void *cpu_physical_memory_map(hwaddr addr,
3060 hwaddr *plen,
3061 int is_write)
3063 return address_space_map(&address_space_memory, addr, plen, is_write);
3066 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3067 int is_write, hwaddr access_len)
3069 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3072 /* warning: addr must be aligned */
3073 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3074 MemTxAttrs attrs,
3075 MemTxResult *result,
3076 enum device_endian endian)
3078 uint8_t *ptr;
3079 uint64_t val;
3080 MemoryRegion *mr;
3081 hwaddr l = 4;
3082 hwaddr addr1;
3083 MemTxResult r;
3084 bool release_lock = false;
3086 rcu_read_lock();
3087 mr = address_space_translate(as, addr, &addr1, &l, false);
3088 if (l < 4 || !memory_access_is_direct(mr, false)) {
3089 release_lock |= prepare_mmio_access(mr);
3091 /* I/O case */
3092 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3093 #if defined(TARGET_WORDS_BIGENDIAN)
3094 if (endian == DEVICE_LITTLE_ENDIAN) {
3095 val = bswap32(val);
3097 #else
3098 if (endian == DEVICE_BIG_ENDIAN) {
3099 val = bswap32(val);
3101 #endif
3102 } else {
3103 /* RAM case */
3104 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3105 switch (endian) {
3106 case DEVICE_LITTLE_ENDIAN:
3107 val = ldl_le_p(ptr);
3108 break;
3109 case DEVICE_BIG_ENDIAN:
3110 val = ldl_be_p(ptr);
3111 break;
3112 default:
3113 val = ldl_p(ptr);
3114 break;
3116 r = MEMTX_OK;
3118 if (result) {
3119 *result = r;
3121 if (release_lock) {
3122 qemu_mutex_unlock_iothread();
3124 rcu_read_unlock();
3125 return val;
3128 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3129 MemTxAttrs attrs, MemTxResult *result)
3131 return address_space_ldl_internal(as, addr, attrs, result,
3132 DEVICE_NATIVE_ENDIAN);
3135 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3136 MemTxAttrs attrs, MemTxResult *result)
3138 return address_space_ldl_internal(as, addr, attrs, result,
3139 DEVICE_LITTLE_ENDIAN);
3142 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3143 MemTxAttrs attrs, MemTxResult *result)
3145 return address_space_ldl_internal(as, addr, attrs, result,
3146 DEVICE_BIG_ENDIAN);
3149 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3151 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3154 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3156 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3159 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3161 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3164 /* warning: addr must be aligned */
3165 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3166 MemTxAttrs attrs,
3167 MemTxResult *result,
3168 enum device_endian endian)
3170 uint8_t *ptr;
3171 uint64_t val;
3172 MemoryRegion *mr;
3173 hwaddr l = 8;
3174 hwaddr addr1;
3175 MemTxResult r;
3176 bool release_lock = false;
3178 rcu_read_lock();
3179 mr = address_space_translate(as, addr, &addr1, &l,
3180 false);
3181 if (l < 8 || !memory_access_is_direct(mr, false)) {
3182 release_lock |= prepare_mmio_access(mr);
3184 /* I/O case */
3185 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3186 #if defined(TARGET_WORDS_BIGENDIAN)
3187 if (endian == DEVICE_LITTLE_ENDIAN) {
3188 val = bswap64(val);
3190 #else
3191 if (endian == DEVICE_BIG_ENDIAN) {
3192 val = bswap64(val);
3194 #endif
3195 } else {
3196 /* RAM case */
3197 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3198 switch (endian) {
3199 case DEVICE_LITTLE_ENDIAN:
3200 val = ldq_le_p(ptr);
3201 break;
3202 case DEVICE_BIG_ENDIAN:
3203 val = ldq_be_p(ptr);
3204 break;
3205 default:
3206 val = ldq_p(ptr);
3207 break;
3209 r = MEMTX_OK;
3211 if (result) {
3212 *result = r;
3214 if (release_lock) {
3215 qemu_mutex_unlock_iothread();
3217 rcu_read_unlock();
3218 return val;
3221 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3222 MemTxAttrs attrs, MemTxResult *result)
3224 return address_space_ldq_internal(as, addr, attrs, result,
3225 DEVICE_NATIVE_ENDIAN);
3228 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3229 MemTxAttrs attrs, MemTxResult *result)
3231 return address_space_ldq_internal(as, addr, attrs, result,
3232 DEVICE_LITTLE_ENDIAN);
3235 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3236 MemTxAttrs attrs, MemTxResult *result)
3238 return address_space_ldq_internal(as, addr, attrs, result,
3239 DEVICE_BIG_ENDIAN);
3242 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3244 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3247 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3249 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3252 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3254 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3257 /* XXX: optimize */
3258 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3259 MemTxAttrs attrs, MemTxResult *result)
3261 uint8_t val;
3262 MemTxResult r;
3264 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3265 if (result) {
3266 *result = r;
3268 return val;
3271 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3273 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3276 /* warning: addr must be aligned */
3277 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3278 hwaddr addr,
3279 MemTxAttrs attrs,
3280 MemTxResult *result,
3281 enum device_endian endian)
3283 uint8_t *ptr;
3284 uint64_t val;
3285 MemoryRegion *mr;
3286 hwaddr l = 2;
3287 hwaddr addr1;
3288 MemTxResult r;
3289 bool release_lock = false;
3291 rcu_read_lock();
3292 mr = address_space_translate(as, addr, &addr1, &l,
3293 false);
3294 if (l < 2 || !memory_access_is_direct(mr, false)) {
3295 release_lock |= prepare_mmio_access(mr);
3297 /* I/O case */
3298 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3299 #if defined(TARGET_WORDS_BIGENDIAN)
3300 if (endian == DEVICE_LITTLE_ENDIAN) {
3301 val = bswap16(val);
3303 #else
3304 if (endian == DEVICE_BIG_ENDIAN) {
3305 val = bswap16(val);
3307 #endif
3308 } else {
3309 /* RAM case */
3310 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3311 switch (endian) {
3312 case DEVICE_LITTLE_ENDIAN:
3313 val = lduw_le_p(ptr);
3314 break;
3315 case DEVICE_BIG_ENDIAN:
3316 val = lduw_be_p(ptr);
3317 break;
3318 default:
3319 val = lduw_p(ptr);
3320 break;
3322 r = MEMTX_OK;
3324 if (result) {
3325 *result = r;
3327 if (release_lock) {
3328 qemu_mutex_unlock_iothread();
3330 rcu_read_unlock();
3331 return val;
3334 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3335 MemTxAttrs attrs, MemTxResult *result)
3337 return address_space_lduw_internal(as, addr, attrs, result,
3338 DEVICE_NATIVE_ENDIAN);
3341 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3342 MemTxAttrs attrs, MemTxResult *result)
3344 return address_space_lduw_internal(as, addr, attrs, result,
3345 DEVICE_LITTLE_ENDIAN);
3348 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3349 MemTxAttrs attrs, MemTxResult *result)
3351 return address_space_lduw_internal(as, addr, attrs, result,
3352 DEVICE_BIG_ENDIAN);
3355 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3357 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3360 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3362 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3365 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3367 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3370 /* warning: addr must be aligned. The ram page is not masked as dirty
3371 and the code inside is not invalidated. It is useful if the dirty
3372 bits are used to track modified PTEs */
3373 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3374 MemTxAttrs attrs, MemTxResult *result)
3376 uint8_t *ptr;
3377 MemoryRegion *mr;
3378 hwaddr l = 4;
3379 hwaddr addr1;
3380 MemTxResult r;
3381 uint8_t dirty_log_mask;
3382 bool release_lock = false;
3384 rcu_read_lock();
3385 mr = address_space_translate(as, addr, &addr1, &l,
3386 true);
3387 if (l < 4 || !memory_access_is_direct(mr, true)) {
3388 release_lock |= prepare_mmio_access(mr);
3390 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3391 } else {
3392 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3393 stl_p(ptr, val);
3395 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3396 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3397 cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr,
3398 4, dirty_log_mask);
3399 r = MEMTX_OK;
3401 if (result) {
3402 *result = r;
3404 if (release_lock) {
3405 qemu_mutex_unlock_iothread();
3407 rcu_read_unlock();
3410 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3412 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3415 /* warning: addr must be aligned */
3416 static inline void address_space_stl_internal(AddressSpace *as,
3417 hwaddr addr, uint32_t val,
3418 MemTxAttrs attrs,
3419 MemTxResult *result,
3420 enum device_endian endian)
3422 uint8_t *ptr;
3423 MemoryRegion *mr;
3424 hwaddr l = 4;
3425 hwaddr addr1;
3426 MemTxResult r;
3427 bool release_lock = false;
3429 rcu_read_lock();
3430 mr = address_space_translate(as, addr, &addr1, &l,
3431 true);
3432 if (l < 4 || !memory_access_is_direct(mr, true)) {
3433 release_lock |= prepare_mmio_access(mr);
3435 #if defined(TARGET_WORDS_BIGENDIAN)
3436 if (endian == DEVICE_LITTLE_ENDIAN) {
3437 val = bswap32(val);
3439 #else
3440 if (endian == DEVICE_BIG_ENDIAN) {
3441 val = bswap32(val);
3443 #endif
3444 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3445 } else {
3446 /* RAM case */
3447 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3448 switch (endian) {
3449 case DEVICE_LITTLE_ENDIAN:
3450 stl_le_p(ptr, val);
3451 break;
3452 case DEVICE_BIG_ENDIAN:
3453 stl_be_p(ptr, val);
3454 break;
3455 default:
3456 stl_p(ptr, val);
3457 break;
3459 invalidate_and_set_dirty(mr, addr1, 4);
3460 r = MEMTX_OK;
3462 if (result) {
3463 *result = r;
3465 if (release_lock) {
3466 qemu_mutex_unlock_iothread();
3468 rcu_read_unlock();
3471 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3472 MemTxAttrs attrs, MemTxResult *result)
3474 address_space_stl_internal(as, addr, val, attrs, result,
3475 DEVICE_NATIVE_ENDIAN);
3478 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3479 MemTxAttrs attrs, MemTxResult *result)
3481 address_space_stl_internal(as, addr, val, attrs, result,
3482 DEVICE_LITTLE_ENDIAN);
3485 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3486 MemTxAttrs attrs, MemTxResult *result)
3488 address_space_stl_internal(as, addr, val, attrs, result,
3489 DEVICE_BIG_ENDIAN);
3492 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3494 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3497 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3499 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3502 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3504 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3507 /* XXX: optimize */
3508 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3509 MemTxAttrs attrs, MemTxResult *result)
3511 uint8_t v = val;
3512 MemTxResult r;
3514 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3515 if (result) {
3516 *result = r;
3520 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3522 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3525 /* warning: addr must be aligned */
3526 static inline void address_space_stw_internal(AddressSpace *as,
3527 hwaddr addr, uint32_t val,
3528 MemTxAttrs attrs,
3529 MemTxResult *result,
3530 enum device_endian endian)
3532 uint8_t *ptr;
3533 MemoryRegion *mr;
3534 hwaddr l = 2;
3535 hwaddr addr1;
3536 MemTxResult r;
3537 bool release_lock = false;
3539 rcu_read_lock();
3540 mr = address_space_translate(as, addr, &addr1, &l, true);
3541 if (l < 2 || !memory_access_is_direct(mr, true)) {
3542 release_lock |= prepare_mmio_access(mr);
3544 #if defined(TARGET_WORDS_BIGENDIAN)
3545 if (endian == DEVICE_LITTLE_ENDIAN) {
3546 val = bswap16(val);
3548 #else
3549 if (endian == DEVICE_BIG_ENDIAN) {
3550 val = bswap16(val);
3552 #endif
3553 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3554 } else {
3555 /* RAM case */
3556 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3557 switch (endian) {
3558 case DEVICE_LITTLE_ENDIAN:
3559 stw_le_p(ptr, val);
3560 break;
3561 case DEVICE_BIG_ENDIAN:
3562 stw_be_p(ptr, val);
3563 break;
3564 default:
3565 stw_p(ptr, val);
3566 break;
3568 invalidate_and_set_dirty(mr, addr1, 2);
3569 r = MEMTX_OK;
3571 if (result) {
3572 *result = r;
3574 if (release_lock) {
3575 qemu_mutex_unlock_iothread();
3577 rcu_read_unlock();
3580 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3581 MemTxAttrs attrs, MemTxResult *result)
3583 address_space_stw_internal(as, addr, val, attrs, result,
3584 DEVICE_NATIVE_ENDIAN);
3587 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3588 MemTxAttrs attrs, MemTxResult *result)
3590 address_space_stw_internal(as, addr, val, attrs, result,
3591 DEVICE_LITTLE_ENDIAN);
3594 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3595 MemTxAttrs attrs, MemTxResult *result)
3597 address_space_stw_internal(as, addr, val, attrs, result,
3598 DEVICE_BIG_ENDIAN);
3601 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3603 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3606 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3608 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3611 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3613 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3616 /* XXX: optimize */
3617 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3618 MemTxAttrs attrs, MemTxResult *result)
3620 MemTxResult r;
3621 val = tswap64(val);
3622 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3623 if (result) {
3624 *result = r;
3628 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3629 MemTxAttrs attrs, MemTxResult *result)
3631 MemTxResult r;
3632 val = cpu_to_le64(val);
3633 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3634 if (result) {
3635 *result = r;
3638 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3639 MemTxAttrs attrs, MemTxResult *result)
3641 MemTxResult r;
3642 val = cpu_to_be64(val);
3643 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3644 if (result) {
3645 *result = r;
3649 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3651 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3654 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3656 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3659 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3661 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3664 /* virtual memory access for debug (includes writing to ROM) */
3665 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3666 uint8_t *buf, int len, int is_write)
3668 int l;
3669 hwaddr phys_addr;
3670 target_ulong page;
3672 while (len > 0) {
3673 int asidx;
3674 MemTxAttrs attrs;
3676 page = addr & TARGET_PAGE_MASK;
3677 phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3678 asidx = cpu_asidx_from_attrs(cpu, attrs);
3679 /* if no physical page mapped, return an error */
3680 if (phys_addr == -1)
3681 return -1;
3682 l = (page + TARGET_PAGE_SIZE) - addr;
3683 if (l > len)
3684 l = len;
3685 phys_addr += (addr & ~TARGET_PAGE_MASK);
3686 if (is_write) {
3687 cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3688 phys_addr, buf, l);
3689 } else {
3690 address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3691 MEMTXATTRS_UNSPECIFIED,
3692 buf, l, 0);
3694 len -= l;
3695 buf += l;
3696 addr += l;
3698 return 0;
3702 * Allows code that needs to deal with migration bitmaps etc to still be built
3703 * target independent.
3705 size_t qemu_target_page_bits(void)
3707 return TARGET_PAGE_BITS;
3710 #endif
3713 * A helper function for the _utterly broken_ virtio device model to find out if
3714 * it's running on a big endian machine. Don't do this at home kids!
3716 bool target_words_bigendian(void);
3717 bool target_words_bigendian(void)
3719 #if defined(TARGET_WORDS_BIGENDIAN)
3720 return true;
3721 #else
3722 return false;
3723 #endif
3726 #ifndef CONFIG_USER_ONLY
3727 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3729 MemoryRegion*mr;
3730 hwaddr l = 1;
3731 bool res;
3733 rcu_read_lock();
3734 mr = address_space_translate(&address_space_memory,
3735 phys_addr, &phys_addr, &l, false);
3737 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3738 rcu_read_unlock();
3739 return res;
3742 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3744 RAMBlock *block;
3745 int ret = 0;
3747 rcu_read_lock();
3748 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3749 ret = func(block->idstr, block->host, block->offset,
3750 block->used_length, opaque);
3751 if (ret) {
3752 break;
3755 rcu_read_unlock();
3756 return ret;
3758 #endif