migration: rename unix.c to socket.c
[qemu/ar7.git] / exec.c
bloba3a93aeed38486ea49f339995ab6529e1f1daa41
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include "qapi/error.h"
21 #ifndef _WIN32
22 #include <sys/mman.h>
23 #endif
25 #include "qemu/cutils.h"
26 #include "cpu.h"
27 #include "exec/exec-all.h"
28 #include "tcg.h"
29 #include "hw/qdev-core.h"
30 #if !defined(CONFIG_USER_ONLY)
31 #include "hw/boards.h"
32 #include "hw/xen/xen.h"
33 #endif
34 #include "sysemu/kvm.h"
35 #include "sysemu/sysemu.h"
36 #include "qemu/timer.h"
37 #include "qemu/config-file.h"
38 #include "qemu/error-report.h"
39 #if defined(CONFIG_USER_ONLY)
40 #include <qemu.h>
41 #else /* !CONFIG_USER_ONLY */
42 #include "hw/hw.h"
43 #include "exec/memory.h"
44 #include "exec/ioport.h"
45 #include "sysemu/dma.h"
46 #include "exec/address-spaces.h"
47 #include "sysemu/xen-mapcache.h"
48 #include "trace.h"
49 #endif
50 #include "exec/cpu-all.h"
51 #include "qemu/rcu_queue.h"
52 #include "qemu/main-loop.h"
53 #include "translate-all.h"
54 #include "sysemu/replay.h"
56 #include "exec/memory-internal.h"
57 #include "exec/ram_addr.h"
58 #include "exec/log.h"
60 #include "qemu/range.h"
61 #ifndef _WIN32
62 #include "qemu/mmap-alloc.h"
63 #endif
65 //#define DEBUG_SUBPAGE
67 #if !defined(CONFIG_USER_ONLY)
68 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
69 * are protected by the ramlist lock.
71 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
73 static MemoryRegion *system_memory;
74 static MemoryRegion *system_io;
76 AddressSpace address_space_io;
77 AddressSpace address_space_memory;
79 MemoryRegion io_mem_rom, io_mem_notdirty;
80 static MemoryRegion io_mem_unassigned;
82 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
83 #define RAM_PREALLOC (1 << 0)
85 /* RAM is mmap-ed with MAP_SHARED */
86 #define RAM_SHARED (1 << 1)
88 /* Only a portion of RAM (used_length) is actually used, and migrated.
89 * This used_length size can change across reboots.
91 #define RAM_RESIZEABLE (1 << 2)
93 #endif
95 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
96 /* current CPU in the current thread. It is only valid inside
97 cpu_exec() */
98 __thread CPUState *current_cpu;
99 /* 0 = Do not count executed instructions.
100 1 = Precise instruction counting.
101 2 = Adaptive rate instruction counting. */
102 int use_icount;
104 #if !defined(CONFIG_USER_ONLY)
106 typedef struct PhysPageEntry PhysPageEntry;
108 struct PhysPageEntry {
109 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
110 uint32_t skip : 6;
111 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
112 uint32_t ptr : 26;
115 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
117 /* Size of the L2 (and L3, etc) page tables. */
118 #define ADDR_SPACE_BITS 64
120 #define P_L2_BITS 9
121 #define P_L2_SIZE (1 << P_L2_BITS)
123 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
125 typedef PhysPageEntry Node[P_L2_SIZE];
127 typedef struct PhysPageMap {
128 struct rcu_head rcu;
130 unsigned sections_nb;
131 unsigned sections_nb_alloc;
132 unsigned nodes_nb;
133 unsigned nodes_nb_alloc;
134 Node *nodes;
135 MemoryRegionSection *sections;
136 } PhysPageMap;
138 struct AddressSpaceDispatch {
139 struct rcu_head rcu;
141 MemoryRegionSection *mru_section;
142 /* This is a multi-level map on the physical address space.
143 * The bottom level has pointers to MemoryRegionSections.
145 PhysPageEntry phys_map;
146 PhysPageMap map;
147 AddressSpace *as;
150 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
151 typedef struct subpage_t {
152 MemoryRegion iomem;
153 AddressSpace *as;
154 hwaddr base;
155 uint16_t sub_section[TARGET_PAGE_SIZE];
156 } subpage_t;
158 #define PHYS_SECTION_UNASSIGNED 0
159 #define PHYS_SECTION_NOTDIRTY 1
160 #define PHYS_SECTION_ROM 2
161 #define PHYS_SECTION_WATCH 3
163 static void io_mem_init(void);
164 static void memory_map_init(void);
165 static void tcg_commit(MemoryListener *listener);
167 static MemoryRegion io_mem_watch;
170 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
171 * @cpu: the CPU whose AddressSpace this is
172 * @as: the AddressSpace itself
173 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
174 * @tcg_as_listener: listener for tracking changes to the AddressSpace
176 struct CPUAddressSpace {
177 CPUState *cpu;
178 AddressSpace *as;
179 struct AddressSpaceDispatch *memory_dispatch;
180 MemoryListener tcg_as_listener;
183 #endif
185 #if !defined(CONFIG_USER_ONLY)
187 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
189 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
190 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
191 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
192 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
196 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
198 unsigned i;
199 uint32_t ret;
200 PhysPageEntry e;
201 PhysPageEntry *p;
203 ret = map->nodes_nb++;
204 p = map->nodes[ret];
205 assert(ret != PHYS_MAP_NODE_NIL);
206 assert(ret != map->nodes_nb_alloc);
208 e.skip = leaf ? 0 : 1;
209 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
210 for (i = 0; i < P_L2_SIZE; ++i) {
211 memcpy(&p[i], &e, sizeof(e));
213 return ret;
216 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
217 hwaddr *index, hwaddr *nb, uint16_t leaf,
218 int level)
220 PhysPageEntry *p;
221 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
223 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
224 lp->ptr = phys_map_node_alloc(map, level == 0);
226 p = map->nodes[lp->ptr];
227 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
229 while (*nb && lp < &p[P_L2_SIZE]) {
230 if ((*index & (step - 1)) == 0 && *nb >= step) {
231 lp->skip = 0;
232 lp->ptr = leaf;
233 *index += step;
234 *nb -= step;
235 } else {
236 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
238 ++lp;
242 static void phys_page_set(AddressSpaceDispatch *d,
243 hwaddr index, hwaddr nb,
244 uint16_t leaf)
246 /* Wildly overreserve - it doesn't matter much. */
247 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
249 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
252 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
253 * and update our entry so we can skip it and go directly to the destination.
255 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
257 unsigned valid_ptr = P_L2_SIZE;
258 int valid = 0;
259 PhysPageEntry *p;
260 int i;
262 if (lp->ptr == PHYS_MAP_NODE_NIL) {
263 return;
266 p = nodes[lp->ptr];
267 for (i = 0; i < P_L2_SIZE; i++) {
268 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
269 continue;
272 valid_ptr = i;
273 valid++;
274 if (p[i].skip) {
275 phys_page_compact(&p[i], nodes, compacted);
279 /* We can only compress if there's only one child. */
280 if (valid != 1) {
281 return;
284 assert(valid_ptr < P_L2_SIZE);
286 /* Don't compress if it won't fit in the # of bits we have. */
287 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
288 return;
291 lp->ptr = p[valid_ptr].ptr;
292 if (!p[valid_ptr].skip) {
293 /* If our only child is a leaf, make this a leaf. */
294 /* By design, we should have made this node a leaf to begin with so we
295 * should never reach here.
296 * But since it's so simple to handle this, let's do it just in case we
297 * change this rule.
299 lp->skip = 0;
300 } else {
301 lp->skip += p[valid_ptr].skip;
305 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
307 DECLARE_BITMAP(compacted, nodes_nb);
309 if (d->phys_map.skip) {
310 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
314 static inline bool section_covers_addr(const MemoryRegionSection *section,
315 hwaddr addr)
317 /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
318 * the section must cover the entire address space.
320 return section->size.hi ||
321 range_covers_byte(section->offset_within_address_space,
322 section->size.lo, addr);
325 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
326 Node *nodes, MemoryRegionSection *sections)
328 PhysPageEntry *p;
329 hwaddr index = addr >> TARGET_PAGE_BITS;
330 int i;
332 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
333 if (lp.ptr == PHYS_MAP_NODE_NIL) {
334 return &sections[PHYS_SECTION_UNASSIGNED];
336 p = nodes[lp.ptr];
337 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
340 if (section_covers_addr(&sections[lp.ptr], addr)) {
341 return &sections[lp.ptr];
342 } else {
343 return &sections[PHYS_SECTION_UNASSIGNED];
347 bool memory_region_is_unassigned(MemoryRegion *mr)
349 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
350 && mr != &io_mem_watch;
353 /* Called from RCU critical section */
354 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
355 hwaddr addr,
356 bool resolve_subpage)
358 MemoryRegionSection *section = atomic_read(&d->mru_section);
359 subpage_t *subpage;
360 bool update;
362 if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
363 section_covers_addr(section, addr)) {
364 update = false;
365 } else {
366 section = phys_page_find(d->phys_map, addr, d->map.nodes,
367 d->map.sections);
368 update = true;
370 if (resolve_subpage && section->mr->subpage) {
371 subpage = container_of(section->mr, subpage_t, iomem);
372 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
374 if (update) {
375 atomic_set(&d->mru_section, section);
377 return section;
380 /* Called from RCU critical section */
381 static MemoryRegionSection *
382 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
383 hwaddr *plen, bool resolve_subpage)
385 MemoryRegionSection *section;
386 MemoryRegion *mr;
387 Int128 diff;
389 section = address_space_lookup_region(d, addr, resolve_subpage);
390 /* Compute offset within MemoryRegionSection */
391 addr -= section->offset_within_address_space;
393 /* Compute offset within MemoryRegion */
394 *xlat = addr + section->offset_within_region;
396 mr = section->mr;
398 /* MMIO registers can be expected to perform full-width accesses based only
399 * on their address, without considering adjacent registers that could
400 * decode to completely different MemoryRegions. When such registers
401 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
402 * regions overlap wildly. For this reason we cannot clamp the accesses
403 * here.
405 * If the length is small (as is the case for address_space_ldl/stl),
406 * everything works fine. If the incoming length is large, however,
407 * the caller really has to do the clamping through memory_access_size.
409 if (memory_region_is_ram(mr)) {
410 diff = int128_sub(section->size, int128_make64(addr));
411 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
413 return section;
416 /* Called from RCU critical section */
417 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
418 hwaddr *xlat, hwaddr *plen,
419 bool is_write)
421 IOMMUTLBEntry iotlb;
422 MemoryRegionSection *section;
423 MemoryRegion *mr;
425 for (;;) {
426 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
427 section = address_space_translate_internal(d, addr, &addr, plen, true);
428 mr = section->mr;
430 if (!mr->iommu_ops) {
431 break;
434 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
435 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
436 | (addr & iotlb.addr_mask));
437 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
438 if (!(iotlb.perm & (1 << is_write))) {
439 mr = &io_mem_unassigned;
440 break;
443 as = iotlb.target_as;
446 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
447 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
448 *plen = MIN(page, *plen);
451 *xlat = addr;
452 return mr;
455 /* Called from RCU critical section */
456 MemoryRegionSection *
457 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
458 hwaddr *xlat, hwaddr *plen)
460 MemoryRegionSection *section;
461 AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;
463 section = address_space_translate_internal(d, addr, xlat, plen, false);
465 assert(!section->mr->iommu_ops);
466 return section;
468 #endif
470 #if !defined(CONFIG_USER_ONLY)
472 static int cpu_common_post_load(void *opaque, int version_id)
474 CPUState *cpu = opaque;
476 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
477 version_id is increased. */
478 cpu->interrupt_request &= ~0x01;
479 tlb_flush(cpu, 1);
481 return 0;
484 static int cpu_common_pre_load(void *opaque)
486 CPUState *cpu = opaque;
488 cpu->exception_index = -1;
490 return 0;
493 static bool cpu_common_exception_index_needed(void *opaque)
495 CPUState *cpu = opaque;
497 return tcg_enabled() && cpu->exception_index != -1;
500 static const VMStateDescription vmstate_cpu_common_exception_index = {
501 .name = "cpu_common/exception_index",
502 .version_id = 1,
503 .minimum_version_id = 1,
504 .needed = cpu_common_exception_index_needed,
505 .fields = (VMStateField[]) {
506 VMSTATE_INT32(exception_index, CPUState),
507 VMSTATE_END_OF_LIST()
511 static bool cpu_common_crash_occurred_needed(void *opaque)
513 CPUState *cpu = opaque;
515 return cpu->crash_occurred;
518 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
519 .name = "cpu_common/crash_occurred",
520 .version_id = 1,
521 .minimum_version_id = 1,
522 .needed = cpu_common_crash_occurred_needed,
523 .fields = (VMStateField[]) {
524 VMSTATE_BOOL(crash_occurred, CPUState),
525 VMSTATE_END_OF_LIST()
529 const VMStateDescription vmstate_cpu_common = {
530 .name = "cpu_common",
531 .version_id = 1,
532 .minimum_version_id = 1,
533 .pre_load = cpu_common_pre_load,
534 .post_load = cpu_common_post_load,
535 .fields = (VMStateField[]) {
536 VMSTATE_UINT32(halted, CPUState),
537 VMSTATE_UINT32(interrupt_request, CPUState),
538 VMSTATE_END_OF_LIST()
540 .subsections = (const VMStateDescription*[]) {
541 &vmstate_cpu_common_exception_index,
542 &vmstate_cpu_common_crash_occurred,
543 NULL
547 #endif
549 CPUState *qemu_get_cpu(int index)
551 CPUState *cpu;
553 CPU_FOREACH(cpu) {
554 if (cpu->cpu_index == index) {
555 return cpu;
559 return NULL;
562 #if !defined(CONFIG_USER_ONLY)
563 void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
565 CPUAddressSpace *newas;
567 /* Target code should have set num_ases before calling us */
568 assert(asidx < cpu->num_ases);
570 if (asidx == 0) {
571 /* address space 0 gets the convenience alias */
572 cpu->as = as;
575 /* KVM cannot currently support multiple address spaces. */
576 assert(asidx == 0 || !kvm_enabled());
578 if (!cpu->cpu_ases) {
579 cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
582 newas = &cpu->cpu_ases[asidx];
583 newas->cpu = cpu;
584 newas->as = as;
585 if (tcg_enabled()) {
586 newas->tcg_as_listener.commit = tcg_commit;
587 memory_listener_register(&newas->tcg_as_listener, as);
591 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
593 /* Return the AddressSpace corresponding to the specified index */
594 return cpu->cpu_ases[asidx].as;
596 #endif
598 #ifndef CONFIG_USER_ONLY
599 static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
601 static int cpu_get_free_index(Error **errp)
603 int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
605 if (cpu >= MAX_CPUMASK_BITS) {
606 error_setg(errp, "Trying to use more CPUs than max of %d",
607 MAX_CPUMASK_BITS);
608 return -1;
611 bitmap_set(cpu_index_map, cpu, 1);
612 return cpu;
615 void cpu_exec_exit(CPUState *cpu)
617 if (cpu->cpu_index == -1) {
618 /* cpu_index was never allocated by this @cpu or was already freed. */
619 return;
622 bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
623 cpu->cpu_index = -1;
625 #else
627 static int cpu_get_free_index(Error **errp)
629 CPUState *some_cpu;
630 int cpu_index = 0;
632 CPU_FOREACH(some_cpu) {
633 cpu_index++;
635 return cpu_index;
638 void cpu_exec_exit(CPUState *cpu)
641 #endif
643 void cpu_exec_init(CPUState *cpu, Error **errp)
645 CPUClass *cc = CPU_GET_CLASS(cpu);
646 Error *local_err = NULL;
648 cpu->as = NULL;
649 cpu->num_ases = 0;
651 #ifndef CONFIG_USER_ONLY
652 cpu->thread_id = qemu_get_thread_id();
654 /* This is a softmmu CPU object, so create a property for it
655 * so users can wire up its memory. (This can't go in qom/cpu.c
656 * because that file is compiled only once for both user-mode
657 * and system builds.) The default if no link is set up is to use
658 * the system address space.
660 object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
661 (Object **)&cpu->memory,
662 qdev_prop_allow_set_link_before_realize,
663 OBJ_PROP_LINK_UNREF_ON_RELEASE,
664 &error_abort);
665 cpu->memory = system_memory;
666 object_ref(OBJECT(cpu->memory));
667 #endif
669 #if defined(CONFIG_USER_ONLY)
670 cpu_list_lock();
671 #endif
672 cpu->cpu_index = cpu_get_free_index(&local_err);
673 if (local_err) {
674 error_propagate(errp, local_err);
675 #if defined(CONFIG_USER_ONLY)
676 cpu_list_unlock();
677 #endif
678 return;
680 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
681 #if defined(CONFIG_USER_ONLY)
682 (void) cc;
683 cpu_list_unlock();
684 #else
685 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
686 vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
688 if (cc->vmsd != NULL) {
689 vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
691 #endif
694 #if defined(CONFIG_USER_ONLY)
695 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
697 tb_invalidate_phys_page_range(pc, pc + 1, 0);
699 #else
700 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
702 MemTxAttrs attrs;
703 hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
704 int asidx = cpu_asidx_from_attrs(cpu, attrs);
705 if (phys != -1) {
706 tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
707 phys | (pc & ~TARGET_PAGE_MASK));
710 #endif
712 #if defined(CONFIG_USER_ONLY)
713 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
718 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
719 int flags)
721 return -ENOSYS;
724 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
728 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
729 int flags, CPUWatchpoint **watchpoint)
731 return -ENOSYS;
733 #else
734 /* Add a watchpoint. */
735 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
736 int flags, CPUWatchpoint **watchpoint)
738 CPUWatchpoint *wp;
740 /* forbid ranges which are empty or run off the end of the address space */
741 if (len == 0 || (addr + len - 1) < addr) {
742 error_report("tried to set invalid watchpoint at %"
743 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
744 return -EINVAL;
746 wp = g_malloc(sizeof(*wp));
748 wp->vaddr = addr;
749 wp->len = len;
750 wp->flags = flags;
752 /* keep all GDB-injected watchpoints in front */
753 if (flags & BP_GDB) {
754 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
755 } else {
756 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
759 tlb_flush_page(cpu, addr);
761 if (watchpoint)
762 *watchpoint = wp;
763 return 0;
766 /* Remove a specific watchpoint. */
767 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
768 int flags)
770 CPUWatchpoint *wp;
772 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
773 if (addr == wp->vaddr && len == wp->len
774 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
775 cpu_watchpoint_remove_by_ref(cpu, wp);
776 return 0;
779 return -ENOENT;
782 /* Remove a specific watchpoint by reference. */
783 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
785 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
787 tlb_flush_page(cpu, watchpoint->vaddr);
789 g_free(watchpoint);
792 /* Remove all matching watchpoints. */
793 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
795 CPUWatchpoint *wp, *next;
797 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
798 if (wp->flags & mask) {
799 cpu_watchpoint_remove_by_ref(cpu, wp);
804 /* Return true if this watchpoint address matches the specified
805 * access (ie the address range covered by the watchpoint overlaps
806 * partially or completely with the address range covered by the
807 * access).
809 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
810 vaddr addr,
811 vaddr len)
813 /* We know the lengths are non-zero, but a little caution is
814 * required to avoid errors in the case where the range ends
815 * exactly at the top of the address space and so addr + len
816 * wraps round to zero.
818 vaddr wpend = wp->vaddr + wp->len - 1;
819 vaddr addrend = addr + len - 1;
821 return !(addr > wpend || wp->vaddr > addrend);
824 #endif
826 /* Add a breakpoint. */
827 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
828 CPUBreakpoint **breakpoint)
830 CPUBreakpoint *bp;
832 bp = g_malloc(sizeof(*bp));
834 bp->pc = pc;
835 bp->flags = flags;
837 /* keep all GDB-injected breakpoints in front */
838 if (flags & BP_GDB) {
839 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
840 } else {
841 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
844 breakpoint_invalidate(cpu, pc);
846 if (breakpoint) {
847 *breakpoint = bp;
849 return 0;
852 /* Remove a specific breakpoint. */
853 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
855 CPUBreakpoint *bp;
857 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
858 if (bp->pc == pc && bp->flags == flags) {
859 cpu_breakpoint_remove_by_ref(cpu, bp);
860 return 0;
863 return -ENOENT;
866 /* Remove a specific breakpoint by reference. */
867 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
869 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
871 breakpoint_invalidate(cpu, breakpoint->pc);
873 g_free(breakpoint);
876 /* Remove all matching breakpoints. */
877 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
879 CPUBreakpoint *bp, *next;
881 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
882 if (bp->flags & mask) {
883 cpu_breakpoint_remove_by_ref(cpu, bp);
888 /* enable or disable single step mode. EXCP_DEBUG is returned by the
889 CPU loop after each instruction */
890 void cpu_single_step(CPUState *cpu, int enabled)
892 if (cpu->singlestep_enabled != enabled) {
893 cpu->singlestep_enabled = enabled;
894 if (kvm_enabled()) {
895 kvm_update_guest_debug(cpu, 0);
896 } else {
897 /* must flush all the translated code to avoid inconsistencies */
898 /* XXX: only flush what is necessary */
899 tb_flush(cpu);
904 void cpu_abort(CPUState *cpu, const char *fmt, ...)
906 va_list ap;
907 va_list ap2;
909 va_start(ap, fmt);
910 va_copy(ap2, ap);
911 fprintf(stderr, "qemu: fatal: ");
912 vfprintf(stderr, fmt, ap);
913 fprintf(stderr, "\n");
914 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
915 if (qemu_log_separate()) {
916 qemu_log("qemu: fatal: ");
917 qemu_log_vprintf(fmt, ap2);
918 qemu_log("\n");
919 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
920 qemu_log_flush();
921 qemu_log_close();
923 va_end(ap2);
924 va_end(ap);
925 replay_finish();
926 #if defined(CONFIG_USER_ONLY)
928 struct sigaction act;
929 sigfillset(&act.sa_mask);
930 act.sa_handler = SIG_DFL;
931 sigaction(SIGABRT, &act, NULL);
933 #endif
934 abort();
937 #if !defined(CONFIG_USER_ONLY)
938 /* Called from RCU critical section */
939 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
941 RAMBlock *block;
943 block = atomic_rcu_read(&ram_list.mru_block);
944 if (block && addr - block->offset < block->max_length) {
945 return block;
947 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
948 if (addr - block->offset < block->max_length) {
949 goto found;
953 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
954 abort();
956 found:
957 /* It is safe to write mru_block outside the iothread lock. This
958 * is what happens:
960 * mru_block = xxx
961 * rcu_read_unlock()
962 * xxx removed from list
963 * rcu_read_lock()
964 * read mru_block
965 * mru_block = NULL;
966 * call_rcu(reclaim_ramblock, xxx);
967 * rcu_read_unlock()
969 * atomic_rcu_set is not needed here. The block was already published
970 * when it was placed into the list. Here we're just making an extra
971 * copy of the pointer.
973 ram_list.mru_block = block;
974 return block;
977 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
979 CPUState *cpu;
980 ram_addr_t start1;
981 RAMBlock *block;
982 ram_addr_t end;
984 end = TARGET_PAGE_ALIGN(start + length);
985 start &= TARGET_PAGE_MASK;
987 rcu_read_lock();
988 block = qemu_get_ram_block(start);
989 assert(block == qemu_get_ram_block(end - 1));
990 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
991 CPU_FOREACH(cpu) {
992 tlb_reset_dirty(cpu, start1, length);
994 rcu_read_unlock();
997 /* Note: start and end must be within the same ram block. */
998 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
999 ram_addr_t length,
1000 unsigned client)
1002 DirtyMemoryBlocks *blocks;
1003 unsigned long end, page;
1004 bool dirty = false;
1006 if (length == 0) {
1007 return false;
1010 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
1011 page = start >> TARGET_PAGE_BITS;
1013 rcu_read_lock();
1015 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1017 while (page < end) {
1018 unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1019 unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1020 unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1022 dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1023 offset, num);
1024 page += num;
1027 rcu_read_unlock();
1029 if (dirty && tcg_enabled()) {
1030 tlb_reset_dirty_range_all(start, length);
1033 return dirty;
1036 /* Called from RCU critical section */
1037 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1038 MemoryRegionSection *section,
1039 target_ulong vaddr,
1040 hwaddr paddr, hwaddr xlat,
1041 int prot,
1042 target_ulong *address)
1044 hwaddr iotlb;
1045 CPUWatchpoint *wp;
1047 if (memory_region_is_ram(section->mr)) {
1048 /* Normal RAM. */
1049 iotlb = memory_region_get_ram_addr(section->mr) + xlat;
1050 if (!section->readonly) {
1051 iotlb |= PHYS_SECTION_NOTDIRTY;
1052 } else {
1053 iotlb |= PHYS_SECTION_ROM;
1055 } else {
1056 AddressSpaceDispatch *d;
1058 d = atomic_rcu_read(&section->address_space->dispatch);
1059 iotlb = section - d->map.sections;
1060 iotlb += xlat;
1063 /* Make accesses to pages with watchpoints go via the
1064 watchpoint trap routines. */
1065 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1066 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1067 /* Avoid trapping reads of pages with a write breakpoint. */
1068 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1069 iotlb = PHYS_SECTION_WATCH + paddr;
1070 *address |= TLB_MMIO;
1071 break;
1076 return iotlb;
1078 #endif /* defined(CONFIG_USER_ONLY) */
1080 #if !defined(CONFIG_USER_ONLY)
1082 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1083 uint16_t section);
1084 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1086 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1087 qemu_anon_ram_alloc;
1090 * Set a custom physical guest memory alloator.
1091 * Accelerators with unusual needs may need this. Hopefully, we can
1092 * get rid of it eventually.
1094 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1096 phys_mem_alloc = alloc;
1099 static uint16_t phys_section_add(PhysPageMap *map,
1100 MemoryRegionSection *section)
1102 /* The physical section number is ORed with a page-aligned
1103 * pointer to produce the iotlb entries. Thus it should
1104 * never overflow into the page-aligned value.
1106 assert(map->sections_nb < TARGET_PAGE_SIZE);
1108 if (map->sections_nb == map->sections_nb_alloc) {
1109 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1110 map->sections = g_renew(MemoryRegionSection, map->sections,
1111 map->sections_nb_alloc);
1113 map->sections[map->sections_nb] = *section;
1114 memory_region_ref(section->mr);
1115 return map->sections_nb++;
1118 static void phys_section_destroy(MemoryRegion *mr)
1120 bool have_sub_page = mr->subpage;
1122 memory_region_unref(mr);
1124 if (have_sub_page) {
1125 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1126 object_unref(OBJECT(&subpage->iomem));
1127 g_free(subpage);
1131 static void phys_sections_free(PhysPageMap *map)
1133 while (map->sections_nb > 0) {
1134 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1135 phys_section_destroy(section->mr);
1137 g_free(map->sections);
1138 g_free(map->nodes);
1141 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1143 subpage_t *subpage;
1144 hwaddr base = section->offset_within_address_space
1145 & TARGET_PAGE_MASK;
1146 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1147 d->map.nodes, d->map.sections);
1148 MemoryRegionSection subsection = {
1149 .offset_within_address_space = base,
1150 .size = int128_make64(TARGET_PAGE_SIZE),
1152 hwaddr start, end;
1154 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1156 if (!(existing->mr->subpage)) {
1157 subpage = subpage_init(d->as, base);
1158 subsection.address_space = d->as;
1159 subsection.mr = &subpage->iomem;
1160 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1161 phys_section_add(&d->map, &subsection));
1162 } else {
1163 subpage = container_of(existing->mr, subpage_t, iomem);
1165 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1166 end = start + int128_get64(section->size) - 1;
1167 subpage_register(subpage, start, end,
1168 phys_section_add(&d->map, section));
1172 static void register_multipage(AddressSpaceDispatch *d,
1173 MemoryRegionSection *section)
1175 hwaddr start_addr = section->offset_within_address_space;
1176 uint16_t section_index = phys_section_add(&d->map, section);
1177 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1178 TARGET_PAGE_BITS));
1180 assert(num_pages);
1181 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1184 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1186 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1187 AddressSpaceDispatch *d = as->next_dispatch;
1188 MemoryRegionSection now = *section, remain = *section;
1189 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1191 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1192 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1193 - now.offset_within_address_space;
1195 now.size = int128_min(int128_make64(left), now.size);
1196 register_subpage(d, &now);
1197 } else {
1198 now.size = int128_zero();
1200 while (int128_ne(remain.size, now.size)) {
1201 remain.size = int128_sub(remain.size, now.size);
1202 remain.offset_within_address_space += int128_get64(now.size);
1203 remain.offset_within_region += int128_get64(now.size);
1204 now = remain;
1205 if (int128_lt(remain.size, page_size)) {
1206 register_subpage(d, &now);
1207 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1208 now.size = page_size;
1209 register_subpage(d, &now);
1210 } else {
1211 now.size = int128_and(now.size, int128_neg(page_size));
1212 register_multipage(d, &now);
1217 void qemu_flush_coalesced_mmio_buffer(void)
1219 if (kvm_enabled())
1220 kvm_flush_coalesced_mmio_buffer();
1223 void qemu_mutex_lock_ramlist(void)
1225 qemu_mutex_lock(&ram_list.mutex);
1228 void qemu_mutex_unlock_ramlist(void)
1230 qemu_mutex_unlock(&ram_list.mutex);
1233 #ifdef __linux__
1234 static void *file_ram_alloc(RAMBlock *block,
1235 ram_addr_t memory,
1236 const char *path,
1237 Error **errp)
1239 bool unlink_on_error = false;
1240 char *filename;
1241 char *sanitized_name;
1242 char *c;
1243 void *area;
1244 int fd = -1;
1245 int64_t page_size;
1247 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1248 error_setg(errp,
1249 "host lacks kvm mmu notifiers, -mem-path unsupported");
1250 return NULL;
1253 for (;;) {
1254 fd = open(path, O_RDWR);
1255 if (fd >= 0) {
1256 /* @path names an existing file, use it */
1257 break;
1259 if (errno == ENOENT) {
1260 /* @path names a file that doesn't exist, create it */
1261 fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1262 if (fd >= 0) {
1263 unlink_on_error = true;
1264 break;
1266 } else if (errno == EISDIR) {
1267 /* @path names a directory, create a file there */
1268 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1269 sanitized_name = g_strdup(memory_region_name(block->mr));
1270 for (c = sanitized_name; *c != '\0'; c++) {
1271 if (*c == '/') {
1272 *c = '_';
1276 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1277 sanitized_name);
1278 g_free(sanitized_name);
1280 fd = mkstemp(filename);
1281 if (fd >= 0) {
1282 unlink(filename);
1283 g_free(filename);
1284 break;
1286 g_free(filename);
1288 if (errno != EEXIST && errno != EINTR) {
1289 error_setg_errno(errp, errno,
1290 "can't open backing store %s for guest RAM",
1291 path);
1292 goto error;
1295 * Try again on EINTR and EEXIST. The latter happens when
1296 * something else creates the file between our two open().
1300 page_size = qemu_fd_getpagesize(fd);
1301 block->mr->align = MAX(page_size, QEMU_VMALLOC_ALIGN);
1303 if (memory < page_size) {
1304 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1305 "or larger than page size 0x%" PRIx64,
1306 memory, page_size);
1307 goto error;
1310 memory = ROUND_UP(memory, page_size);
1313 * ftruncate is not supported by hugetlbfs in older
1314 * hosts, so don't bother bailing out on errors.
1315 * If anything goes wrong with it under other filesystems,
1316 * mmap will fail.
1318 if (ftruncate(fd, memory)) {
1319 perror("ftruncate");
1322 area = qemu_ram_mmap(fd, memory, block->mr->align,
1323 block->flags & RAM_SHARED);
1324 if (area == MAP_FAILED) {
1325 error_setg_errno(errp, errno,
1326 "unable to map backing store for guest RAM");
1327 goto error;
1330 if (mem_prealloc) {
1331 os_mem_prealloc(fd, area, memory);
1334 block->fd = fd;
1335 return area;
1337 error:
1338 if (unlink_on_error) {
1339 unlink(path);
1341 if (fd != -1) {
1342 close(fd);
1344 return NULL;
1346 #endif
1348 /* Called with the ramlist lock held. */
1349 static ram_addr_t find_ram_offset(ram_addr_t size)
1351 RAMBlock *block, *next_block;
1352 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1354 assert(size != 0); /* it would hand out same offset multiple times */
1356 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1357 return 0;
1360 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1361 ram_addr_t end, next = RAM_ADDR_MAX;
1363 end = block->offset + block->max_length;
1365 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1366 if (next_block->offset >= end) {
1367 next = MIN(next, next_block->offset);
1370 if (next - end >= size && next - end < mingap) {
1371 offset = end;
1372 mingap = next - end;
1376 if (offset == RAM_ADDR_MAX) {
1377 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1378 (uint64_t)size);
1379 abort();
1382 return offset;
1385 ram_addr_t last_ram_offset(void)
1387 RAMBlock *block;
1388 ram_addr_t last = 0;
1390 rcu_read_lock();
1391 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1392 last = MAX(last, block->offset + block->max_length);
1394 rcu_read_unlock();
1395 return last;
1398 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1400 int ret;
1402 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1403 if (!machine_dump_guest_core(current_machine)) {
1404 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1405 if (ret) {
1406 perror("qemu_madvise");
1407 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1408 "but dump_guest_core=off specified\n");
1413 const char *qemu_ram_get_idstr(RAMBlock *rb)
1415 return rb->idstr;
1418 /* Called with iothread lock held. */
1419 void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
1421 RAMBlock *block;
1423 assert(new_block);
1424 assert(!new_block->idstr[0]);
1426 if (dev) {
1427 char *id = qdev_get_dev_path(dev);
1428 if (id) {
1429 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1430 g_free(id);
1433 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1435 rcu_read_lock();
1436 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1437 if (block != new_block &&
1438 !strcmp(block->idstr, new_block->idstr)) {
1439 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1440 new_block->idstr);
1441 abort();
1444 rcu_read_unlock();
1447 /* Called with iothread lock held. */
1448 void qemu_ram_unset_idstr(RAMBlock *block)
1450 /* FIXME: arch_init.c assumes that this is not called throughout
1451 * migration. Ignore the problem since hot-unplug during migration
1452 * does not work anyway.
1454 if (block) {
1455 memset(block->idstr, 0, sizeof(block->idstr));
1459 static int memory_try_enable_merging(void *addr, size_t len)
1461 if (!machine_mem_merge(current_machine)) {
1462 /* disabled by the user */
1463 return 0;
1466 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1469 /* Only legal before guest might have detected the memory size: e.g. on
1470 * incoming migration, or right after reset.
1472 * As memory core doesn't know how is memory accessed, it is up to
1473 * resize callback to update device state and/or add assertions to detect
1474 * misuse, if necessary.
1476 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
1478 assert(block);
1480 newsize = HOST_PAGE_ALIGN(newsize);
1482 if (block->used_length == newsize) {
1483 return 0;
1486 if (!(block->flags & RAM_RESIZEABLE)) {
1487 error_setg_errno(errp, EINVAL,
1488 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1489 " in != 0x" RAM_ADDR_FMT, block->idstr,
1490 newsize, block->used_length);
1491 return -EINVAL;
1494 if (block->max_length < newsize) {
1495 error_setg_errno(errp, EINVAL,
1496 "Length too large: %s: 0x" RAM_ADDR_FMT
1497 " > 0x" RAM_ADDR_FMT, block->idstr,
1498 newsize, block->max_length);
1499 return -EINVAL;
1502 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1503 block->used_length = newsize;
1504 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1505 DIRTY_CLIENTS_ALL);
1506 memory_region_set_size(block->mr, newsize);
1507 if (block->resized) {
1508 block->resized(block->idstr, newsize, block->host);
1510 return 0;
1513 /* Called with ram_list.mutex held */
1514 static void dirty_memory_extend(ram_addr_t old_ram_size,
1515 ram_addr_t new_ram_size)
1517 ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1518 DIRTY_MEMORY_BLOCK_SIZE);
1519 ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1520 DIRTY_MEMORY_BLOCK_SIZE);
1521 int i;
1523 /* Only need to extend if block count increased */
1524 if (new_num_blocks <= old_num_blocks) {
1525 return;
1528 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1529 DirtyMemoryBlocks *old_blocks;
1530 DirtyMemoryBlocks *new_blocks;
1531 int j;
1533 old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1534 new_blocks = g_malloc(sizeof(*new_blocks) +
1535 sizeof(new_blocks->blocks[0]) * new_num_blocks);
1537 if (old_num_blocks) {
1538 memcpy(new_blocks->blocks, old_blocks->blocks,
1539 old_num_blocks * sizeof(old_blocks->blocks[0]));
1542 for (j = old_num_blocks; j < new_num_blocks; j++) {
1543 new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1546 atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1548 if (old_blocks) {
1549 g_free_rcu(old_blocks, rcu);
1554 static void ram_block_add(RAMBlock *new_block, Error **errp)
1556 RAMBlock *block;
1557 RAMBlock *last_block = NULL;
1558 ram_addr_t old_ram_size, new_ram_size;
1559 Error *err = NULL;
1561 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1563 qemu_mutex_lock_ramlist();
1564 new_block->offset = find_ram_offset(new_block->max_length);
1566 if (!new_block->host) {
1567 if (xen_enabled()) {
1568 xen_ram_alloc(new_block->offset, new_block->max_length,
1569 new_block->mr, &err);
1570 if (err) {
1571 error_propagate(errp, err);
1572 qemu_mutex_unlock_ramlist();
1573 return;
1575 } else {
1576 new_block->host = phys_mem_alloc(new_block->max_length,
1577 &new_block->mr->align);
1578 if (!new_block->host) {
1579 error_setg_errno(errp, errno,
1580 "cannot set up guest memory '%s'",
1581 memory_region_name(new_block->mr));
1582 qemu_mutex_unlock_ramlist();
1583 return;
1585 memory_try_enable_merging(new_block->host, new_block->max_length);
1589 new_ram_size = MAX(old_ram_size,
1590 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1591 if (new_ram_size > old_ram_size) {
1592 migration_bitmap_extend(old_ram_size, new_ram_size);
1593 dirty_memory_extend(old_ram_size, new_ram_size);
1595 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1596 * QLIST (which has an RCU-friendly variant) does not have insertion at
1597 * tail, so save the last element in last_block.
1599 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1600 last_block = block;
1601 if (block->max_length < new_block->max_length) {
1602 break;
1605 if (block) {
1606 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1607 } else if (last_block) {
1608 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1609 } else { /* list is empty */
1610 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1612 ram_list.mru_block = NULL;
1614 /* Write list before version */
1615 smp_wmb();
1616 ram_list.version++;
1617 qemu_mutex_unlock_ramlist();
1619 cpu_physical_memory_set_dirty_range(new_block->offset,
1620 new_block->used_length,
1621 DIRTY_CLIENTS_ALL);
1623 if (new_block->host) {
1624 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1625 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1626 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1627 if (kvm_enabled()) {
1628 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1633 #ifdef __linux__
1634 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1635 bool share, const char *mem_path,
1636 Error **errp)
1638 RAMBlock *new_block;
1639 Error *local_err = NULL;
1641 if (xen_enabled()) {
1642 error_setg(errp, "-mem-path not supported with Xen");
1643 return NULL;
1646 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1648 * file_ram_alloc() needs to allocate just like
1649 * phys_mem_alloc, but we haven't bothered to provide
1650 * a hook there.
1652 error_setg(errp,
1653 "-mem-path not supported with this accelerator");
1654 return NULL;
1657 size = HOST_PAGE_ALIGN(size);
1658 new_block = g_malloc0(sizeof(*new_block));
1659 new_block->mr = mr;
1660 new_block->used_length = size;
1661 new_block->max_length = size;
1662 new_block->flags = share ? RAM_SHARED : 0;
1663 new_block->host = file_ram_alloc(new_block, size,
1664 mem_path, errp);
1665 if (!new_block->host) {
1666 g_free(new_block);
1667 return NULL;
1670 ram_block_add(new_block, &local_err);
1671 if (local_err) {
1672 g_free(new_block);
1673 error_propagate(errp, local_err);
1674 return NULL;
1676 return new_block;
1678 #endif
1680 static
1681 RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1682 void (*resized)(const char*,
1683 uint64_t length,
1684 void *host),
1685 void *host, bool resizeable,
1686 MemoryRegion *mr, Error **errp)
1688 RAMBlock *new_block;
1689 Error *local_err = NULL;
1691 size = HOST_PAGE_ALIGN(size);
1692 max_size = HOST_PAGE_ALIGN(max_size);
1693 new_block = g_malloc0(sizeof(*new_block));
1694 new_block->mr = mr;
1695 new_block->resized = resized;
1696 new_block->used_length = size;
1697 new_block->max_length = max_size;
1698 assert(max_size >= size);
1699 new_block->fd = -1;
1700 new_block->host = host;
1701 if (host) {
1702 new_block->flags |= RAM_PREALLOC;
1704 if (resizeable) {
1705 new_block->flags |= RAM_RESIZEABLE;
1707 ram_block_add(new_block, &local_err);
1708 if (local_err) {
1709 g_free(new_block);
1710 error_propagate(errp, local_err);
1711 return NULL;
1713 return new_block;
1716 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1717 MemoryRegion *mr, Error **errp)
1719 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1722 RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1724 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1727 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1728 void (*resized)(const char*,
1729 uint64_t length,
1730 void *host),
1731 MemoryRegion *mr, Error **errp)
1733 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1736 static void reclaim_ramblock(RAMBlock *block)
1738 if (block->flags & RAM_PREALLOC) {
1740 } else if (xen_enabled()) {
1741 xen_invalidate_map_cache_entry(block->host);
1742 #ifndef _WIN32
1743 } else if (block->fd >= 0) {
1744 qemu_ram_munmap(block->host, block->max_length);
1745 close(block->fd);
1746 #endif
1747 } else {
1748 qemu_anon_ram_free(block->host, block->max_length);
1750 g_free(block);
1753 void qemu_ram_free(RAMBlock *block)
1755 if (!block) {
1756 return;
1759 qemu_mutex_lock_ramlist();
1760 QLIST_REMOVE_RCU(block, next);
1761 ram_list.mru_block = NULL;
1762 /* Write list before version */
1763 smp_wmb();
1764 ram_list.version++;
1765 call_rcu(block, reclaim_ramblock, rcu);
1766 qemu_mutex_unlock_ramlist();
1769 #ifndef _WIN32
1770 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1772 RAMBlock *block;
1773 ram_addr_t offset;
1774 int flags;
1775 void *area, *vaddr;
1777 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1778 offset = addr - block->offset;
1779 if (offset < block->max_length) {
1780 vaddr = ramblock_ptr(block, offset);
1781 if (block->flags & RAM_PREALLOC) {
1783 } else if (xen_enabled()) {
1784 abort();
1785 } else {
1786 flags = MAP_FIXED;
1787 if (block->fd >= 0) {
1788 flags |= (block->flags & RAM_SHARED ?
1789 MAP_SHARED : MAP_PRIVATE);
1790 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1791 flags, block->fd, offset);
1792 } else {
1794 * Remap needs to match alloc. Accelerators that
1795 * set phys_mem_alloc never remap. If they did,
1796 * we'd need a remap hook here.
1798 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1800 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1801 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1802 flags, -1, 0);
1804 if (area != vaddr) {
1805 fprintf(stderr, "Could not remap addr: "
1806 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1807 length, addr);
1808 exit(1);
1810 memory_try_enable_merging(vaddr, length);
1811 qemu_ram_setup_dump(vaddr, length);
1816 #endif /* !_WIN32 */
1818 int qemu_get_ram_fd(ram_addr_t addr)
1820 RAMBlock *block;
1821 int fd;
1823 rcu_read_lock();
1824 block = qemu_get_ram_block(addr);
1825 fd = block->fd;
1826 rcu_read_unlock();
1827 return fd;
1830 void qemu_set_ram_fd(ram_addr_t addr, int fd)
1832 RAMBlock *block;
1834 rcu_read_lock();
1835 block = qemu_get_ram_block(addr);
1836 block->fd = fd;
1837 rcu_read_unlock();
1840 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1842 RAMBlock *block;
1843 void *ptr;
1845 rcu_read_lock();
1846 block = qemu_get_ram_block(addr);
1847 ptr = ramblock_ptr(block, 0);
1848 rcu_read_unlock();
1849 return ptr;
1852 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1853 * This should not be used for general purpose DMA. Use address_space_map
1854 * or address_space_rw instead. For local memory (e.g. video ram) that the
1855 * device owns, use memory_region_get_ram_ptr.
1857 * Called within RCU critical section.
1859 void *qemu_get_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1861 RAMBlock *block = ram_block;
1863 if (block == NULL) {
1864 block = qemu_get_ram_block(addr);
1867 if (xen_enabled() && block->host == NULL) {
1868 /* We need to check if the requested address is in the RAM
1869 * because we don't want to map the entire memory in QEMU.
1870 * In that case just map until the end of the page.
1872 if (block->offset == 0) {
1873 return xen_map_cache(addr, 0, 0);
1876 block->host = xen_map_cache(block->offset, block->max_length, 1);
1878 return ramblock_ptr(block, addr - block->offset);
1881 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1882 * but takes a size argument.
1884 * Called within RCU critical section.
1886 static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1887 hwaddr *size)
1889 RAMBlock *block = ram_block;
1890 ram_addr_t offset_inside_block;
1891 if (*size == 0) {
1892 return NULL;
1895 if (block == NULL) {
1896 block = qemu_get_ram_block(addr);
1898 offset_inside_block = addr - block->offset;
1899 *size = MIN(*size, block->max_length - offset_inside_block);
1901 if (xen_enabled() && block->host == NULL) {
1902 /* We need to check if the requested address is in the RAM
1903 * because we don't want to map the entire memory in QEMU.
1904 * In that case just map the requested area.
1906 if (block->offset == 0) {
1907 return xen_map_cache(addr, *size, 1);
1910 block->host = xen_map_cache(block->offset, block->max_length, 1);
1913 return ramblock_ptr(block, offset_inside_block);
1917 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1918 * in that RAMBlock.
1920 * ptr: Host pointer to look up
1921 * round_offset: If true round the result offset down to a page boundary
1922 * *ram_addr: set to result ram_addr
1923 * *offset: set to result offset within the RAMBlock
1925 * Returns: RAMBlock (or NULL if not found)
1927 * By the time this function returns, the returned pointer is not protected
1928 * by RCU anymore. If the caller is not within an RCU critical section and
1929 * does not hold the iothread lock, it must have other means of protecting the
1930 * pointer, such as a reference to the region that includes the incoming
1931 * ram_addr_t.
1933 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1934 ram_addr_t *ram_addr,
1935 ram_addr_t *offset)
1937 RAMBlock *block;
1938 uint8_t *host = ptr;
1940 if (xen_enabled()) {
1941 rcu_read_lock();
1942 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1943 block = qemu_get_ram_block(*ram_addr);
1944 if (block) {
1945 *offset = (host - block->host);
1947 rcu_read_unlock();
1948 return block;
1951 rcu_read_lock();
1952 block = atomic_rcu_read(&ram_list.mru_block);
1953 if (block && block->host && host - block->host < block->max_length) {
1954 goto found;
1957 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1958 /* This case append when the block is not mapped. */
1959 if (block->host == NULL) {
1960 continue;
1962 if (host - block->host < block->max_length) {
1963 goto found;
1967 rcu_read_unlock();
1968 return NULL;
1970 found:
1971 *offset = (host - block->host);
1972 if (round_offset) {
1973 *offset &= TARGET_PAGE_MASK;
1975 *ram_addr = block->offset + *offset;
1976 rcu_read_unlock();
1977 return block;
1981 * Finds the named RAMBlock
1983 * name: The name of RAMBlock to find
1985 * Returns: RAMBlock (or NULL if not found)
1987 RAMBlock *qemu_ram_block_by_name(const char *name)
1989 RAMBlock *block;
1991 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1992 if (!strcmp(name, block->idstr)) {
1993 return block;
1997 return NULL;
2000 /* Some of the softmmu routines need to translate from a host pointer
2001 (typically a TLB entry) back to a ram offset. */
2002 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2004 RAMBlock *block;
2005 ram_addr_t offset; /* Not used */
2007 block = qemu_ram_block_from_host(ptr, false, ram_addr, &offset);
2009 if (!block) {
2010 return NULL;
2013 return block->mr;
2016 /* Called within RCU critical section. */
2017 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2018 uint64_t val, unsigned size)
2020 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
2021 tb_invalidate_phys_page_fast(ram_addr, size);
2023 switch (size) {
2024 case 1:
2025 stb_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2026 break;
2027 case 2:
2028 stw_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2029 break;
2030 case 4:
2031 stl_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2032 break;
2033 default:
2034 abort();
2036 /* Set both VGA and migration bits for simplicity and to remove
2037 * the notdirty callback faster.
2039 cpu_physical_memory_set_dirty_range(ram_addr, size,
2040 DIRTY_CLIENTS_NOCODE);
2041 /* we remove the notdirty callback only if the code has been
2042 flushed */
2043 if (!cpu_physical_memory_is_clean(ram_addr)) {
2044 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2048 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2049 unsigned size, bool is_write)
2051 return is_write;
2054 static const MemoryRegionOps notdirty_mem_ops = {
2055 .write = notdirty_mem_write,
2056 .valid.accepts = notdirty_mem_accepts,
2057 .endianness = DEVICE_NATIVE_ENDIAN,
2060 /* Generate a debug exception if a watchpoint has been hit. */
2061 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2063 CPUState *cpu = current_cpu;
2064 CPUClass *cc = CPU_GET_CLASS(cpu);
2065 CPUArchState *env = cpu->env_ptr;
2066 target_ulong pc, cs_base;
2067 target_ulong vaddr;
2068 CPUWatchpoint *wp;
2069 uint32_t cpu_flags;
2071 if (cpu->watchpoint_hit) {
2072 /* We re-entered the check after replacing the TB. Now raise
2073 * the debug interrupt so that is will trigger after the
2074 * current instruction. */
2075 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2076 return;
2078 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2079 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2080 if (cpu_watchpoint_address_matches(wp, vaddr, len)
2081 && (wp->flags & flags)) {
2082 if (flags == BP_MEM_READ) {
2083 wp->flags |= BP_WATCHPOINT_HIT_READ;
2084 } else {
2085 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2087 wp->hitaddr = vaddr;
2088 wp->hitattrs = attrs;
2089 if (!cpu->watchpoint_hit) {
2090 if (wp->flags & BP_CPU &&
2091 !cc->debug_check_watchpoint(cpu, wp)) {
2092 wp->flags &= ~BP_WATCHPOINT_HIT;
2093 continue;
2095 cpu->watchpoint_hit = wp;
2096 tb_check_watchpoint(cpu);
2097 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2098 cpu->exception_index = EXCP_DEBUG;
2099 cpu_loop_exit(cpu);
2100 } else {
2101 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2102 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2103 cpu_resume_from_signal(cpu, NULL);
2106 } else {
2107 wp->flags &= ~BP_WATCHPOINT_HIT;
2112 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2113 so these check for a hit then pass through to the normal out-of-line
2114 phys routines. */
2115 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2116 unsigned size, MemTxAttrs attrs)
2118 MemTxResult res;
2119 uint64_t data;
2120 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2121 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2123 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2124 switch (size) {
2125 case 1:
2126 data = address_space_ldub(as, addr, attrs, &res);
2127 break;
2128 case 2:
2129 data = address_space_lduw(as, addr, attrs, &res);
2130 break;
2131 case 4:
2132 data = address_space_ldl(as, addr, attrs, &res);
2133 break;
2134 default: abort();
2136 *pdata = data;
2137 return res;
2140 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2141 uint64_t val, unsigned size,
2142 MemTxAttrs attrs)
2144 MemTxResult res;
2145 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2146 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2148 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2149 switch (size) {
2150 case 1:
2151 address_space_stb(as, addr, val, attrs, &res);
2152 break;
2153 case 2:
2154 address_space_stw(as, addr, val, attrs, &res);
2155 break;
2156 case 4:
2157 address_space_stl(as, addr, val, attrs, &res);
2158 break;
2159 default: abort();
2161 return res;
2164 static const MemoryRegionOps watch_mem_ops = {
2165 .read_with_attrs = watch_mem_read,
2166 .write_with_attrs = watch_mem_write,
2167 .endianness = DEVICE_NATIVE_ENDIAN,
2170 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2171 unsigned len, MemTxAttrs attrs)
2173 subpage_t *subpage = opaque;
2174 uint8_t buf[8];
2175 MemTxResult res;
2177 #if defined(DEBUG_SUBPAGE)
2178 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2179 subpage, len, addr);
2180 #endif
2181 res = address_space_read(subpage->as, addr + subpage->base,
2182 attrs, buf, len);
2183 if (res) {
2184 return res;
2186 switch (len) {
2187 case 1:
2188 *data = ldub_p(buf);
2189 return MEMTX_OK;
2190 case 2:
2191 *data = lduw_p(buf);
2192 return MEMTX_OK;
2193 case 4:
2194 *data = ldl_p(buf);
2195 return MEMTX_OK;
2196 case 8:
2197 *data = ldq_p(buf);
2198 return MEMTX_OK;
2199 default:
2200 abort();
2204 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2205 uint64_t value, unsigned len, MemTxAttrs attrs)
2207 subpage_t *subpage = opaque;
2208 uint8_t buf[8];
2210 #if defined(DEBUG_SUBPAGE)
2211 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2212 " value %"PRIx64"\n",
2213 __func__, subpage, len, addr, value);
2214 #endif
2215 switch (len) {
2216 case 1:
2217 stb_p(buf, value);
2218 break;
2219 case 2:
2220 stw_p(buf, value);
2221 break;
2222 case 4:
2223 stl_p(buf, value);
2224 break;
2225 case 8:
2226 stq_p(buf, value);
2227 break;
2228 default:
2229 abort();
2231 return address_space_write(subpage->as, addr + subpage->base,
2232 attrs, buf, len);
2235 static bool subpage_accepts(void *opaque, hwaddr addr,
2236 unsigned len, bool is_write)
2238 subpage_t *subpage = opaque;
2239 #if defined(DEBUG_SUBPAGE)
2240 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2241 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2242 #endif
2244 return address_space_access_valid(subpage->as, addr + subpage->base,
2245 len, is_write);
2248 static const MemoryRegionOps subpage_ops = {
2249 .read_with_attrs = subpage_read,
2250 .write_with_attrs = subpage_write,
2251 .impl.min_access_size = 1,
2252 .impl.max_access_size = 8,
2253 .valid.min_access_size = 1,
2254 .valid.max_access_size = 8,
2255 .valid.accepts = subpage_accepts,
2256 .endianness = DEVICE_NATIVE_ENDIAN,
2259 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2260 uint16_t section)
2262 int idx, eidx;
2264 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2265 return -1;
2266 idx = SUBPAGE_IDX(start);
2267 eidx = SUBPAGE_IDX(end);
2268 #if defined(DEBUG_SUBPAGE)
2269 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2270 __func__, mmio, start, end, idx, eidx, section);
2271 #endif
2272 for (; idx <= eidx; idx++) {
2273 mmio->sub_section[idx] = section;
2276 return 0;
2279 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2281 subpage_t *mmio;
2283 mmio = g_malloc0(sizeof(subpage_t));
2285 mmio->as = as;
2286 mmio->base = base;
2287 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2288 NULL, TARGET_PAGE_SIZE);
2289 mmio->iomem.subpage = true;
2290 #if defined(DEBUG_SUBPAGE)
2291 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2292 mmio, base, TARGET_PAGE_SIZE);
2293 #endif
2294 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2296 return mmio;
2299 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2300 MemoryRegion *mr)
2302 assert(as);
2303 MemoryRegionSection section = {
2304 .address_space = as,
2305 .mr = mr,
2306 .offset_within_address_space = 0,
2307 .offset_within_region = 0,
2308 .size = int128_2_64(),
2311 return phys_section_add(map, &section);
2314 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2316 int asidx = cpu_asidx_from_attrs(cpu, attrs);
2317 CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2318 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2319 MemoryRegionSection *sections = d->map.sections;
2321 return sections[index & ~TARGET_PAGE_MASK].mr;
2324 static void io_mem_init(void)
2326 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2327 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2328 NULL, UINT64_MAX);
2329 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2330 NULL, UINT64_MAX);
2331 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2332 NULL, UINT64_MAX);
2335 static void mem_begin(MemoryListener *listener)
2337 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2338 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2339 uint16_t n;
2341 n = dummy_section(&d->map, as, &io_mem_unassigned);
2342 assert(n == PHYS_SECTION_UNASSIGNED);
2343 n = dummy_section(&d->map, as, &io_mem_notdirty);
2344 assert(n == PHYS_SECTION_NOTDIRTY);
2345 n = dummy_section(&d->map, as, &io_mem_rom);
2346 assert(n == PHYS_SECTION_ROM);
2347 n = dummy_section(&d->map, as, &io_mem_watch);
2348 assert(n == PHYS_SECTION_WATCH);
2350 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2351 d->as = as;
2352 as->next_dispatch = d;
2355 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2357 phys_sections_free(&d->map);
2358 g_free(d);
2361 static void mem_commit(MemoryListener *listener)
2363 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2364 AddressSpaceDispatch *cur = as->dispatch;
2365 AddressSpaceDispatch *next = as->next_dispatch;
2367 phys_page_compact_all(next, next->map.nodes_nb);
2369 atomic_rcu_set(&as->dispatch, next);
2370 if (cur) {
2371 call_rcu(cur, address_space_dispatch_free, rcu);
2375 static void tcg_commit(MemoryListener *listener)
2377 CPUAddressSpace *cpuas;
2378 AddressSpaceDispatch *d;
2380 /* since each CPU stores ram addresses in its TLB cache, we must
2381 reset the modified entries */
2382 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2383 cpu_reloading_memory_map();
2384 /* The CPU and TLB are protected by the iothread lock.
2385 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2386 * may have split the RCU critical section.
2388 d = atomic_rcu_read(&cpuas->as->dispatch);
2389 cpuas->memory_dispatch = d;
2390 tlb_flush(cpuas->cpu, 1);
2393 void address_space_init_dispatch(AddressSpace *as)
2395 as->dispatch = NULL;
2396 as->dispatch_listener = (MemoryListener) {
2397 .begin = mem_begin,
2398 .commit = mem_commit,
2399 .region_add = mem_add,
2400 .region_nop = mem_add,
2401 .priority = 0,
2403 memory_listener_register(&as->dispatch_listener, as);
2406 void address_space_unregister(AddressSpace *as)
2408 memory_listener_unregister(&as->dispatch_listener);
2411 void address_space_destroy_dispatch(AddressSpace *as)
2413 AddressSpaceDispatch *d = as->dispatch;
2415 atomic_rcu_set(&as->dispatch, NULL);
2416 if (d) {
2417 call_rcu(d, address_space_dispatch_free, rcu);
2421 static void memory_map_init(void)
2423 system_memory = g_malloc(sizeof(*system_memory));
2425 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2426 address_space_init(&address_space_memory, system_memory, "memory");
2428 system_io = g_malloc(sizeof(*system_io));
2429 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2430 65536);
2431 address_space_init(&address_space_io, system_io, "I/O");
2434 MemoryRegion *get_system_memory(void)
2436 return system_memory;
2439 MemoryRegion *get_system_io(void)
2441 return system_io;
2444 #endif /* !defined(CONFIG_USER_ONLY) */
2446 /* physical memory access (slow version, mainly for debug) */
2447 #if defined(CONFIG_USER_ONLY)
2448 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2449 uint8_t *buf, int len, int is_write)
2451 int l, flags;
2452 target_ulong page;
2453 void * p;
2455 while (len > 0) {
2456 page = addr & TARGET_PAGE_MASK;
2457 l = (page + TARGET_PAGE_SIZE) - addr;
2458 if (l > len)
2459 l = len;
2460 flags = page_get_flags(page);
2461 if (!(flags & PAGE_VALID))
2462 return -1;
2463 if (is_write) {
2464 if (!(flags & PAGE_WRITE))
2465 return -1;
2466 /* XXX: this code should not depend on lock_user */
2467 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2468 return -1;
2469 memcpy(p, buf, l);
2470 unlock_user(p, addr, l);
2471 } else {
2472 if (!(flags & PAGE_READ))
2473 return -1;
2474 /* XXX: this code should not depend on lock_user */
2475 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2476 return -1;
2477 memcpy(buf, p, l);
2478 unlock_user(p, addr, 0);
2480 len -= l;
2481 buf += l;
2482 addr += l;
2484 return 0;
2487 #else
2489 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2490 hwaddr length)
2492 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2493 /* No early return if dirty_log_mask is or becomes 0, because
2494 * cpu_physical_memory_set_dirty_range will still call
2495 * xen_modified_memory.
2497 if (dirty_log_mask) {
2498 dirty_log_mask =
2499 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2501 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2502 tb_invalidate_phys_range(addr, addr + length);
2503 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2505 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2508 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2510 unsigned access_size_max = mr->ops->valid.max_access_size;
2512 /* Regions are assumed to support 1-4 byte accesses unless
2513 otherwise specified. */
2514 if (access_size_max == 0) {
2515 access_size_max = 4;
2518 /* Bound the maximum access by the alignment of the address. */
2519 if (!mr->ops->impl.unaligned) {
2520 unsigned align_size_max = addr & -addr;
2521 if (align_size_max != 0 && align_size_max < access_size_max) {
2522 access_size_max = align_size_max;
2526 /* Don't attempt accesses larger than the maximum. */
2527 if (l > access_size_max) {
2528 l = access_size_max;
2530 l = pow2floor(l);
2532 return l;
2535 static bool prepare_mmio_access(MemoryRegion *mr)
2537 bool unlocked = !qemu_mutex_iothread_locked();
2538 bool release_lock = false;
2540 if (unlocked && mr->global_locking) {
2541 qemu_mutex_lock_iothread();
2542 unlocked = false;
2543 release_lock = true;
2545 if (mr->flush_coalesced_mmio) {
2546 if (unlocked) {
2547 qemu_mutex_lock_iothread();
2549 qemu_flush_coalesced_mmio_buffer();
2550 if (unlocked) {
2551 qemu_mutex_unlock_iothread();
2555 return release_lock;
2558 /* Called within RCU critical section. */
2559 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2560 MemTxAttrs attrs,
2561 const uint8_t *buf,
2562 int len, hwaddr addr1,
2563 hwaddr l, MemoryRegion *mr)
2565 uint8_t *ptr;
2566 uint64_t val;
2567 MemTxResult result = MEMTX_OK;
2568 bool release_lock = false;
2570 for (;;) {
2571 if (!memory_access_is_direct(mr, true)) {
2572 release_lock |= prepare_mmio_access(mr);
2573 l = memory_access_size(mr, l, addr1);
2574 /* XXX: could force current_cpu to NULL to avoid
2575 potential bugs */
2576 switch (l) {
2577 case 8:
2578 /* 64 bit write access */
2579 val = ldq_p(buf);
2580 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2581 attrs);
2582 break;
2583 case 4:
2584 /* 32 bit write access */
2585 val = ldl_p(buf);
2586 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2587 attrs);
2588 break;
2589 case 2:
2590 /* 16 bit write access */
2591 val = lduw_p(buf);
2592 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2593 attrs);
2594 break;
2595 case 1:
2596 /* 8 bit write access */
2597 val = ldub_p(buf);
2598 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2599 attrs);
2600 break;
2601 default:
2602 abort();
2604 } else {
2605 addr1 += memory_region_get_ram_addr(mr);
2606 /* RAM case */
2607 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
2608 memcpy(ptr, buf, l);
2609 invalidate_and_set_dirty(mr, addr1, l);
2612 if (release_lock) {
2613 qemu_mutex_unlock_iothread();
2614 release_lock = false;
2617 len -= l;
2618 buf += l;
2619 addr += l;
2621 if (!len) {
2622 break;
2625 l = len;
2626 mr = address_space_translate(as, addr, &addr1, &l, true);
2629 return result;
2632 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2633 const uint8_t *buf, int len)
2635 hwaddr l;
2636 hwaddr addr1;
2637 MemoryRegion *mr;
2638 MemTxResult result = MEMTX_OK;
2640 if (len > 0) {
2641 rcu_read_lock();
2642 l = len;
2643 mr = address_space_translate(as, addr, &addr1, &l, true);
2644 result = address_space_write_continue(as, addr, attrs, buf, len,
2645 addr1, l, mr);
2646 rcu_read_unlock();
2649 return result;
2652 /* Called within RCU critical section. */
2653 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2654 MemTxAttrs attrs, uint8_t *buf,
2655 int len, hwaddr addr1, hwaddr l,
2656 MemoryRegion *mr)
2658 uint8_t *ptr;
2659 uint64_t val;
2660 MemTxResult result = MEMTX_OK;
2661 bool release_lock = false;
2663 for (;;) {
2664 if (!memory_access_is_direct(mr, false)) {
2665 /* I/O case */
2666 release_lock |= prepare_mmio_access(mr);
2667 l = memory_access_size(mr, l, addr1);
2668 switch (l) {
2669 case 8:
2670 /* 64 bit read access */
2671 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2672 attrs);
2673 stq_p(buf, val);
2674 break;
2675 case 4:
2676 /* 32 bit read access */
2677 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2678 attrs);
2679 stl_p(buf, val);
2680 break;
2681 case 2:
2682 /* 16 bit read access */
2683 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2684 attrs);
2685 stw_p(buf, val);
2686 break;
2687 case 1:
2688 /* 8 bit read access */
2689 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2690 attrs);
2691 stb_p(buf, val);
2692 break;
2693 default:
2694 abort();
2696 } else {
2697 /* RAM case */
2698 ptr = qemu_get_ram_ptr(mr->ram_block,
2699 memory_region_get_ram_addr(mr) + addr1);
2700 memcpy(buf, ptr, l);
2703 if (release_lock) {
2704 qemu_mutex_unlock_iothread();
2705 release_lock = false;
2708 len -= l;
2709 buf += l;
2710 addr += l;
2712 if (!len) {
2713 break;
2716 l = len;
2717 mr = address_space_translate(as, addr, &addr1, &l, false);
2720 return result;
2723 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2724 MemTxAttrs attrs, uint8_t *buf, int len)
2726 hwaddr l;
2727 hwaddr addr1;
2728 MemoryRegion *mr;
2729 MemTxResult result = MEMTX_OK;
2731 if (len > 0) {
2732 rcu_read_lock();
2733 l = len;
2734 mr = address_space_translate(as, addr, &addr1, &l, false);
2735 result = address_space_read_continue(as, addr, attrs, buf, len,
2736 addr1, l, mr);
2737 rcu_read_unlock();
2740 return result;
2743 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2744 uint8_t *buf, int len, bool is_write)
2746 if (is_write) {
2747 return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2748 } else {
2749 return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2753 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2754 int len, int is_write)
2756 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2757 buf, len, is_write);
2760 enum write_rom_type {
2761 WRITE_DATA,
2762 FLUSH_CACHE,
2765 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2766 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2768 hwaddr l;
2769 uint8_t *ptr;
2770 hwaddr addr1;
2771 MemoryRegion *mr;
2773 rcu_read_lock();
2774 while (len > 0) {
2775 l = len;
2776 mr = address_space_translate(as, addr, &addr1, &l, true);
2778 if (!(memory_region_is_ram(mr) ||
2779 memory_region_is_romd(mr))) {
2780 l = memory_access_size(mr, l, addr1);
2781 } else {
2782 addr1 += memory_region_get_ram_addr(mr);
2783 /* ROM/RAM case */
2784 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
2785 switch (type) {
2786 case WRITE_DATA:
2787 memcpy(ptr, buf, l);
2788 invalidate_and_set_dirty(mr, addr1, l);
2789 break;
2790 case FLUSH_CACHE:
2791 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2792 break;
2795 len -= l;
2796 buf += l;
2797 addr += l;
2799 rcu_read_unlock();
2802 /* used for ROM loading : can write in RAM and ROM */
2803 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2804 const uint8_t *buf, int len)
2806 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2809 void cpu_flush_icache_range(hwaddr start, int len)
2812 * This function should do the same thing as an icache flush that was
2813 * triggered from within the guest. For TCG we are always cache coherent,
2814 * so there is no need to flush anything. For KVM / Xen we need to flush
2815 * the host's instruction cache at least.
2817 if (tcg_enabled()) {
2818 return;
2821 cpu_physical_memory_write_rom_internal(&address_space_memory,
2822 start, NULL, len, FLUSH_CACHE);
2825 typedef struct {
2826 MemoryRegion *mr;
2827 void *buffer;
2828 hwaddr addr;
2829 hwaddr len;
2830 bool in_use;
2831 } BounceBuffer;
2833 static BounceBuffer bounce;
2835 typedef struct MapClient {
2836 QEMUBH *bh;
2837 QLIST_ENTRY(MapClient) link;
2838 } MapClient;
2840 QemuMutex map_client_list_lock;
2841 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2842 = QLIST_HEAD_INITIALIZER(map_client_list);
2844 static void cpu_unregister_map_client_do(MapClient *client)
2846 QLIST_REMOVE(client, link);
2847 g_free(client);
2850 static void cpu_notify_map_clients_locked(void)
2852 MapClient *client;
2854 while (!QLIST_EMPTY(&map_client_list)) {
2855 client = QLIST_FIRST(&map_client_list);
2856 qemu_bh_schedule(client->bh);
2857 cpu_unregister_map_client_do(client);
2861 void cpu_register_map_client(QEMUBH *bh)
2863 MapClient *client = g_malloc(sizeof(*client));
2865 qemu_mutex_lock(&map_client_list_lock);
2866 client->bh = bh;
2867 QLIST_INSERT_HEAD(&map_client_list, client, link);
2868 if (!atomic_read(&bounce.in_use)) {
2869 cpu_notify_map_clients_locked();
2871 qemu_mutex_unlock(&map_client_list_lock);
2874 void cpu_exec_init_all(void)
2876 qemu_mutex_init(&ram_list.mutex);
2877 io_mem_init();
2878 memory_map_init();
2879 qemu_mutex_init(&map_client_list_lock);
2882 void cpu_unregister_map_client(QEMUBH *bh)
2884 MapClient *client;
2886 qemu_mutex_lock(&map_client_list_lock);
2887 QLIST_FOREACH(client, &map_client_list, link) {
2888 if (client->bh == bh) {
2889 cpu_unregister_map_client_do(client);
2890 break;
2893 qemu_mutex_unlock(&map_client_list_lock);
2896 static void cpu_notify_map_clients(void)
2898 qemu_mutex_lock(&map_client_list_lock);
2899 cpu_notify_map_clients_locked();
2900 qemu_mutex_unlock(&map_client_list_lock);
2903 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2905 MemoryRegion *mr;
2906 hwaddr l, xlat;
2908 rcu_read_lock();
2909 while (len > 0) {
2910 l = len;
2911 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2912 if (!memory_access_is_direct(mr, is_write)) {
2913 l = memory_access_size(mr, l, addr);
2914 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2915 return false;
2919 len -= l;
2920 addr += l;
2922 rcu_read_unlock();
2923 return true;
2926 /* Map a physical memory region into a host virtual address.
2927 * May map a subset of the requested range, given by and returned in *plen.
2928 * May return NULL if resources needed to perform the mapping are exhausted.
2929 * Use only for reads OR writes - not for read-modify-write operations.
2930 * Use cpu_register_map_client() to know when retrying the map operation is
2931 * likely to succeed.
2933 void *address_space_map(AddressSpace *as,
2934 hwaddr addr,
2935 hwaddr *plen,
2936 bool is_write)
2938 hwaddr len = *plen;
2939 hwaddr done = 0;
2940 hwaddr l, xlat, base;
2941 MemoryRegion *mr, *this_mr;
2942 ram_addr_t raddr;
2943 void *ptr;
2945 if (len == 0) {
2946 return NULL;
2949 l = len;
2950 rcu_read_lock();
2951 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2953 if (!memory_access_is_direct(mr, is_write)) {
2954 if (atomic_xchg(&bounce.in_use, true)) {
2955 rcu_read_unlock();
2956 return NULL;
2958 /* Avoid unbounded allocations */
2959 l = MIN(l, TARGET_PAGE_SIZE);
2960 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2961 bounce.addr = addr;
2962 bounce.len = l;
2964 memory_region_ref(mr);
2965 bounce.mr = mr;
2966 if (!is_write) {
2967 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2968 bounce.buffer, l);
2971 rcu_read_unlock();
2972 *plen = l;
2973 return bounce.buffer;
2976 base = xlat;
2977 raddr = memory_region_get_ram_addr(mr);
2979 for (;;) {
2980 len -= l;
2981 addr += l;
2982 done += l;
2983 if (len == 0) {
2984 break;
2987 l = len;
2988 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2989 if (this_mr != mr || xlat != base + done) {
2990 break;
2994 memory_region_ref(mr);
2995 *plen = done;
2996 ptr = qemu_ram_ptr_length(mr->ram_block, raddr + base, plen);
2997 rcu_read_unlock();
2999 return ptr;
3002 /* Unmaps a memory region previously mapped by address_space_map().
3003 * Will also mark the memory as dirty if is_write == 1. access_len gives
3004 * the amount of memory that was actually read or written by the caller.
3006 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3007 int is_write, hwaddr access_len)
3009 if (buffer != bounce.buffer) {
3010 MemoryRegion *mr;
3011 ram_addr_t addr1;
3013 mr = qemu_ram_addr_from_host(buffer, &addr1);
3014 assert(mr != NULL);
3015 if (is_write) {
3016 invalidate_and_set_dirty(mr, addr1, access_len);
3018 if (xen_enabled()) {
3019 xen_invalidate_map_cache_entry(buffer);
3021 memory_region_unref(mr);
3022 return;
3024 if (is_write) {
3025 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
3026 bounce.buffer, access_len);
3028 qemu_vfree(bounce.buffer);
3029 bounce.buffer = NULL;
3030 memory_region_unref(bounce.mr);
3031 atomic_mb_set(&bounce.in_use, false);
3032 cpu_notify_map_clients();
3035 void *cpu_physical_memory_map(hwaddr addr,
3036 hwaddr *plen,
3037 int is_write)
3039 return address_space_map(&address_space_memory, addr, plen, is_write);
3042 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3043 int is_write, hwaddr access_len)
3045 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3048 /* warning: addr must be aligned */
3049 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3050 MemTxAttrs attrs,
3051 MemTxResult *result,
3052 enum device_endian endian)
3054 uint8_t *ptr;
3055 uint64_t val;
3056 MemoryRegion *mr;
3057 hwaddr l = 4;
3058 hwaddr addr1;
3059 MemTxResult r;
3060 bool release_lock = false;
3062 rcu_read_lock();
3063 mr = address_space_translate(as, addr, &addr1, &l, false);
3064 if (l < 4 || !memory_access_is_direct(mr, false)) {
3065 release_lock |= prepare_mmio_access(mr);
3067 /* I/O case */
3068 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3069 #if defined(TARGET_WORDS_BIGENDIAN)
3070 if (endian == DEVICE_LITTLE_ENDIAN) {
3071 val = bswap32(val);
3073 #else
3074 if (endian == DEVICE_BIG_ENDIAN) {
3075 val = bswap32(val);
3077 #endif
3078 } else {
3079 /* RAM case */
3080 ptr = qemu_get_ram_ptr(mr->ram_block,
3081 memory_region_get_ram_addr(mr) + addr1);
3082 switch (endian) {
3083 case DEVICE_LITTLE_ENDIAN:
3084 val = ldl_le_p(ptr);
3085 break;
3086 case DEVICE_BIG_ENDIAN:
3087 val = ldl_be_p(ptr);
3088 break;
3089 default:
3090 val = ldl_p(ptr);
3091 break;
3093 r = MEMTX_OK;
3095 if (result) {
3096 *result = r;
3098 if (release_lock) {
3099 qemu_mutex_unlock_iothread();
3101 rcu_read_unlock();
3102 return val;
3105 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3106 MemTxAttrs attrs, MemTxResult *result)
3108 return address_space_ldl_internal(as, addr, attrs, result,
3109 DEVICE_NATIVE_ENDIAN);
3112 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3113 MemTxAttrs attrs, MemTxResult *result)
3115 return address_space_ldl_internal(as, addr, attrs, result,
3116 DEVICE_LITTLE_ENDIAN);
3119 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3120 MemTxAttrs attrs, MemTxResult *result)
3122 return address_space_ldl_internal(as, addr, attrs, result,
3123 DEVICE_BIG_ENDIAN);
3126 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3128 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3131 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3133 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3136 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3138 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3141 /* warning: addr must be aligned */
3142 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3143 MemTxAttrs attrs,
3144 MemTxResult *result,
3145 enum device_endian endian)
3147 uint8_t *ptr;
3148 uint64_t val;
3149 MemoryRegion *mr;
3150 hwaddr l = 8;
3151 hwaddr addr1;
3152 MemTxResult r;
3153 bool release_lock = false;
3155 rcu_read_lock();
3156 mr = address_space_translate(as, addr, &addr1, &l,
3157 false);
3158 if (l < 8 || !memory_access_is_direct(mr, false)) {
3159 release_lock |= prepare_mmio_access(mr);
3161 /* I/O case */
3162 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3163 #if defined(TARGET_WORDS_BIGENDIAN)
3164 if (endian == DEVICE_LITTLE_ENDIAN) {
3165 val = bswap64(val);
3167 #else
3168 if (endian == DEVICE_BIG_ENDIAN) {
3169 val = bswap64(val);
3171 #endif
3172 } else {
3173 /* RAM case */
3174 ptr = qemu_get_ram_ptr(mr->ram_block,
3175 memory_region_get_ram_addr(mr) + addr1);
3176 switch (endian) {
3177 case DEVICE_LITTLE_ENDIAN:
3178 val = ldq_le_p(ptr);
3179 break;
3180 case DEVICE_BIG_ENDIAN:
3181 val = ldq_be_p(ptr);
3182 break;
3183 default:
3184 val = ldq_p(ptr);
3185 break;
3187 r = MEMTX_OK;
3189 if (result) {
3190 *result = r;
3192 if (release_lock) {
3193 qemu_mutex_unlock_iothread();
3195 rcu_read_unlock();
3196 return val;
3199 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3200 MemTxAttrs attrs, MemTxResult *result)
3202 return address_space_ldq_internal(as, addr, attrs, result,
3203 DEVICE_NATIVE_ENDIAN);
3206 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3207 MemTxAttrs attrs, MemTxResult *result)
3209 return address_space_ldq_internal(as, addr, attrs, result,
3210 DEVICE_LITTLE_ENDIAN);
3213 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3214 MemTxAttrs attrs, MemTxResult *result)
3216 return address_space_ldq_internal(as, addr, attrs, result,
3217 DEVICE_BIG_ENDIAN);
3220 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3222 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3225 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3227 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3230 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3232 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3235 /* XXX: optimize */
3236 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3237 MemTxAttrs attrs, MemTxResult *result)
3239 uint8_t val;
3240 MemTxResult r;
3242 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3243 if (result) {
3244 *result = r;
3246 return val;
3249 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3251 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3254 /* warning: addr must be aligned */
3255 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3256 hwaddr addr,
3257 MemTxAttrs attrs,
3258 MemTxResult *result,
3259 enum device_endian endian)
3261 uint8_t *ptr;
3262 uint64_t val;
3263 MemoryRegion *mr;
3264 hwaddr l = 2;
3265 hwaddr addr1;
3266 MemTxResult r;
3267 bool release_lock = false;
3269 rcu_read_lock();
3270 mr = address_space_translate(as, addr, &addr1, &l,
3271 false);
3272 if (l < 2 || !memory_access_is_direct(mr, false)) {
3273 release_lock |= prepare_mmio_access(mr);
3275 /* I/O case */
3276 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3277 #if defined(TARGET_WORDS_BIGENDIAN)
3278 if (endian == DEVICE_LITTLE_ENDIAN) {
3279 val = bswap16(val);
3281 #else
3282 if (endian == DEVICE_BIG_ENDIAN) {
3283 val = bswap16(val);
3285 #endif
3286 } else {
3287 /* RAM case */
3288 ptr = qemu_get_ram_ptr(mr->ram_block,
3289 memory_region_get_ram_addr(mr) + addr1);
3290 switch (endian) {
3291 case DEVICE_LITTLE_ENDIAN:
3292 val = lduw_le_p(ptr);
3293 break;
3294 case DEVICE_BIG_ENDIAN:
3295 val = lduw_be_p(ptr);
3296 break;
3297 default:
3298 val = lduw_p(ptr);
3299 break;
3301 r = MEMTX_OK;
3303 if (result) {
3304 *result = r;
3306 if (release_lock) {
3307 qemu_mutex_unlock_iothread();
3309 rcu_read_unlock();
3310 return val;
3313 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3314 MemTxAttrs attrs, MemTxResult *result)
3316 return address_space_lduw_internal(as, addr, attrs, result,
3317 DEVICE_NATIVE_ENDIAN);
3320 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3321 MemTxAttrs attrs, MemTxResult *result)
3323 return address_space_lduw_internal(as, addr, attrs, result,
3324 DEVICE_LITTLE_ENDIAN);
3327 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3328 MemTxAttrs attrs, MemTxResult *result)
3330 return address_space_lduw_internal(as, addr, attrs, result,
3331 DEVICE_BIG_ENDIAN);
3334 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3336 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3339 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3341 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3344 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3346 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3349 /* warning: addr must be aligned. The ram page is not masked as dirty
3350 and the code inside is not invalidated. It is useful if the dirty
3351 bits are used to track modified PTEs */
3352 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3353 MemTxAttrs attrs, MemTxResult *result)
3355 uint8_t *ptr;
3356 MemoryRegion *mr;
3357 hwaddr l = 4;
3358 hwaddr addr1;
3359 MemTxResult r;
3360 uint8_t dirty_log_mask;
3361 bool release_lock = false;
3363 rcu_read_lock();
3364 mr = address_space_translate(as, addr, &addr1, &l,
3365 true);
3366 if (l < 4 || !memory_access_is_direct(mr, true)) {
3367 release_lock |= prepare_mmio_access(mr);
3369 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3370 } else {
3371 addr1 += memory_region_get_ram_addr(mr);
3372 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3373 stl_p(ptr, val);
3375 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3376 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3377 cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3378 r = MEMTX_OK;
3380 if (result) {
3381 *result = r;
3383 if (release_lock) {
3384 qemu_mutex_unlock_iothread();
3386 rcu_read_unlock();
3389 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3391 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3394 /* warning: addr must be aligned */
3395 static inline void address_space_stl_internal(AddressSpace *as,
3396 hwaddr addr, uint32_t val,
3397 MemTxAttrs attrs,
3398 MemTxResult *result,
3399 enum device_endian endian)
3401 uint8_t *ptr;
3402 MemoryRegion *mr;
3403 hwaddr l = 4;
3404 hwaddr addr1;
3405 MemTxResult r;
3406 bool release_lock = false;
3408 rcu_read_lock();
3409 mr = address_space_translate(as, addr, &addr1, &l,
3410 true);
3411 if (l < 4 || !memory_access_is_direct(mr, true)) {
3412 release_lock |= prepare_mmio_access(mr);
3414 #if defined(TARGET_WORDS_BIGENDIAN)
3415 if (endian == DEVICE_LITTLE_ENDIAN) {
3416 val = bswap32(val);
3418 #else
3419 if (endian == DEVICE_BIG_ENDIAN) {
3420 val = bswap32(val);
3422 #endif
3423 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3424 } else {
3425 /* RAM case */
3426 addr1 += memory_region_get_ram_addr(mr);
3427 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3428 switch (endian) {
3429 case DEVICE_LITTLE_ENDIAN:
3430 stl_le_p(ptr, val);
3431 break;
3432 case DEVICE_BIG_ENDIAN:
3433 stl_be_p(ptr, val);
3434 break;
3435 default:
3436 stl_p(ptr, val);
3437 break;
3439 invalidate_and_set_dirty(mr, addr1, 4);
3440 r = MEMTX_OK;
3442 if (result) {
3443 *result = r;
3445 if (release_lock) {
3446 qemu_mutex_unlock_iothread();
3448 rcu_read_unlock();
3451 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3452 MemTxAttrs attrs, MemTxResult *result)
3454 address_space_stl_internal(as, addr, val, attrs, result,
3455 DEVICE_NATIVE_ENDIAN);
3458 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3459 MemTxAttrs attrs, MemTxResult *result)
3461 address_space_stl_internal(as, addr, val, attrs, result,
3462 DEVICE_LITTLE_ENDIAN);
3465 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3466 MemTxAttrs attrs, MemTxResult *result)
3468 address_space_stl_internal(as, addr, val, attrs, result,
3469 DEVICE_BIG_ENDIAN);
3472 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3474 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3477 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3479 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3482 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3484 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3487 /* XXX: optimize */
3488 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3489 MemTxAttrs attrs, MemTxResult *result)
3491 uint8_t v = val;
3492 MemTxResult r;
3494 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3495 if (result) {
3496 *result = r;
3500 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3502 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3505 /* warning: addr must be aligned */
3506 static inline void address_space_stw_internal(AddressSpace *as,
3507 hwaddr addr, uint32_t val,
3508 MemTxAttrs attrs,
3509 MemTxResult *result,
3510 enum device_endian endian)
3512 uint8_t *ptr;
3513 MemoryRegion *mr;
3514 hwaddr l = 2;
3515 hwaddr addr1;
3516 MemTxResult r;
3517 bool release_lock = false;
3519 rcu_read_lock();
3520 mr = address_space_translate(as, addr, &addr1, &l, true);
3521 if (l < 2 || !memory_access_is_direct(mr, true)) {
3522 release_lock |= prepare_mmio_access(mr);
3524 #if defined(TARGET_WORDS_BIGENDIAN)
3525 if (endian == DEVICE_LITTLE_ENDIAN) {
3526 val = bswap16(val);
3528 #else
3529 if (endian == DEVICE_BIG_ENDIAN) {
3530 val = bswap16(val);
3532 #endif
3533 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3534 } else {
3535 /* RAM case */
3536 addr1 += memory_region_get_ram_addr(mr);
3537 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3538 switch (endian) {
3539 case DEVICE_LITTLE_ENDIAN:
3540 stw_le_p(ptr, val);
3541 break;
3542 case DEVICE_BIG_ENDIAN:
3543 stw_be_p(ptr, val);
3544 break;
3545 default:
3546 stw_p(ptr, val);
3547 break;
3549 invalidate_and_set_dirty(mr, addr1, 2);
3550 r = MEMTX_OK;
3552 if (result) {
3553 *result = r;
3555 if (release_lock) {
3556 qemu_mutex_unlock_iothread();
3558 rcu_read_unlock();
3561 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3562 MemTxAttrs attrs, MemTxResult *result)
3564 address_space_stw_internal(as, addr, val, attrs, result,
3565 DEVICE_NATIVE_ENDIAN);
3568 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3569 MemTxAttrs attrs, MemTxResult *result)
3571 address_space_stw_internal(as, addr, val, attrs, result,
3572 DEVICE_LITTLE_ENDIAN);
3575 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3576 MemTxAttrs attrs, MemTxResult *result)
3578 address_space_stw_internal(as, addr, val, attrs, result,
3579 DEVICE_BIG_ENDIAN);
3582 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3584 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3587 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3589 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3592 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3594 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3597 /* XXX: optimize */
3598 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3599 MemTxAttrs attrs, MemTxResult *result)
3601 MemTxResult r;
3602 val = tswap64(val);
3603 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3604 if (result) {
3605 *result = r;
3609 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3610 MemTxAttrs attrs, MemTxResult *result)
3612 MemTxResult r;
3613 val = cpu_to_le64(val);
3614 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3615 if (result) {
3616 *result = r;
3619 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3620 MemTxAttrs attrs, MemTxResult *result)
3622 MemTxResult r;
3623 val = cpu_to_be64(val);
3624 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3625 if (result) {
3626 *result = r;
3630 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3632 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3635 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3637 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3640 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3642 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3645 /* virtual memory access for debug (includes writing to ROM) */
3646 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3647 uint8_t *buf, int len, int is_write)
3649 int l;
3650 hwaddr phys_addr;
3651 target_ulong page;
3653 while (len > 0) {
3654 int asidx;
3655 MemTxAttrs attrs;
3657 page = addr & TARGET_PAGE_MASK;
3658 phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3659 asidx = cpu_asidx_from_attrs(cpu, attrs);
3660 /* if no physical page mapped, return an error */
3661 if (phys_addr == -1)
3662 return -1;
3663 l = (page + TARGET_PAGE_SIZE) - addr;
3664 if (l > len)
3665 l = len;
3666 phys_addr += (addr & ~TARGET_PAGE_MASK);
3667 if (is_write) {
3668 cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3669 phys_addr, buf, l);
3670 } else {
3671 address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3672 MEMTXATTRS_UNSPECIFIED,
3673 buf, l, 0);
3675 len -= l;
3676 buf += l;
3677 addr += l;
3679 return 0;
3683 * Allows code that needs to deal with migration bitmaps etc to still be built
3684 * target independent.
3686 size_t qemu_target_page_bits(void)
3688 return TARGET_PAGE_BITS;
3691 #endif
3694 * A helper function for the _utterly broken_ virtio device model to find out if
3695 * it's running on a big endian machine. Don't do this at home kids!
3697 bool target_words_bigendian(void);
3698 bool target_words_bigendian(void)
3700 #if defined(TARGET_WORDS_BIGENDIAN)
3701 return true;
3702 #else
3703 return false;
3704 #endif
3707 #ifndef CONFIG_USER_ONLY
3708 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3710 MemoryRegion*mr;
3711 hwaddr l = 1;
3712 bool res;
3714 rcu_read_lock();
3715 mr = address_space_translate(&address_space_memory,
3716 phys_addr, &phys_addr, &l, false);
3718 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3719 rcu_read_unlock();
3720 return res;
3723 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3725 RAMBlock *block;
3726 int ret = 0;
3728 rcu_read_lock();
3729 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3730 ret = func(block->idstr, block->host, block->offset,
3731 block->used_length, opaque);
3732 if (ret) {
3733 break;
3736 rcu_read_unlock();
3737 return ret;
3739 #endif