kvm: Allow the Hyper-V vendor ID to be specified
[qemu/ar7.git] / exec.c
blobaad94a0ef73460955e6bd27b2df58848a987d1a1
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
31 #endif
32 #include "hw/qdev.h"
33 #include "qemu/osdep.h"
34 #include "sysemu/kvm.h"
35 #include "sysemu/sysemu.h"
36 #include "hw/xen/xen.h"
37 #include "qemu/timer.h"
38 #include "qemu/config-file.h"
39 #include "qemu/error-report.h"
40 #include "exec/memory.h"
41 #include "sysemu/dma.h"
42 #include "exec/address-spaces.h"
43 #if defined(CONFIG_USER_ONLY)
44 #include <qemu.h>
45 #else /* !CONFIG_USER_ONLY */
46 #include "sysemu/xen-mapcache.h"
47 #include "trace.h"
48 #endif
49 #include "exec/cpu-all.h"
50 #include "qemu/rcu_queue.h"
51 #include "qemu/main-loop.h"
52 #include "translate-all.h"
54 #include "exec/memory-internal.h"
55 #include "exec/ram_addr.h"
57 #include "qemu/range.h"
59 //#define DEBUG_SUBPAGE
61 #if !defined(CONFIG_USER_ONLY)
62 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
63 * are protected by the ramlist lock.
65 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
67 static MemoryRegion *system_memory;
68 static MemoryRegion *system_io;
70 AddressSpace address_space_io;
71 AddressSpace address_space_memory;
73 MemoryRegion io_mem_rom, io_mem_notdirty;
74 static MemoryRegion io_mem_unassigned;
76 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
77 #define RAM_PREALLOC (1 << 0)
79 /* RAM is mmap-ed with MAP_SHARED */
80 #define RAM_SHARED (1 << 1)
82 /* Only a portion of RAM (used_length) is actually used, and migrated.
83 * This used_length size can change across reboots.
85 #define RAM_RESIZEABLE (1 << 2)
87 /* An extra page is mapped on top of this RAM.
89 #define RAM_EXTRA (1 << 3)
90 #endif
92 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
93 /* current CPU in the current thread. It is only valid inside
94 cpu_exec() */
95 __thread CPUState *current_cpu;
96 /* 0 = Do not count executed instructions.
97 1 = Precise instruction counting.
98 2 = Adaptive rate instruction counting. */
99 int use_icount;
101 #if !defined(CONFIG_USER_ONLY)
103 typedef struct PhysPageEntry PhysPageEntry;
105 struct PhysPageEntry {
106 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
107 uint32_t skip : 6;
108 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
109 uint32_t ptr : 26;
112 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
114 /* Size of the L2 (and L3, etc) page tables. */
115 #define ADDR_SPACE_BITS 64
117 #define P_L2_BITS 9
118 #define P_L2_SIZE (1 << P_L2_BITS)
120 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
122 typedef PhysPageEntry Node[P_L2_SIZE];
124 typedef struct PhysPageMap {
125 struct rcu_head rcu;
127 unsigned sections_nb;
128 unsigned sections_nb_alloc;
129 unsigned nodes_nb;
130 unsigned nodes_nb_alloc;
131 Node *nodes;
132 MemoryRegionSection *sections;
133 } PhysPageMap;
135 struct AddressSpaceDispatch {
136 struct rcu_head rcu;
138 /* This is a multi-level map on the physical address space.
139 * The bottom level has pointers to MemoryRegionSections.
141 PhysPageEntry phys_map;
142 PhysPageMap map;
143 AddressSpace *as;
146 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
147 typedef struct subpage_t {
148 MemoryRegion iomem;
149 AddressSpace *as;
150 hwaddr base;
151 uint16_t sub_section[TARGET_PAGE_SIZE];
152 } subpage_t;
154 #define PHYS_SECTION_UNASSIGNED 0
155 #define PHYS_SECTION_NOTDIRTY 1
156 #define PHYS_SECTION_ROM 2
157 #define PHYS_SECTION_WATCH 3
159 static void io_mem_init(void);
160 static void memory_map_init(void);
161 static void tcg_commit(MemoryListener *listener);
163 static MemoryRegion io_mem_watch;
166 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
167 * @cpu: the CPU whose AddressSpace this is
168 * @as: the AddressSpace itself
169 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
170 * @tcg_as_listener: listener for tracking changes to the AddressSpace
172 struct CPUAddressSpace {
173 CPUState *cpu;
174 AddressSpace *as;
175 struct AddressSpaceDispatch *memory_dispatch;
176 MemoryListener tcg_as_listener;
179 #endif
181 #if !defined(CONFIG_USER_ONLY)
183 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
185 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
186 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
187 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
188 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
192 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
194 unsigned i;
195 uint32_t ret;
196 PhysPageEntry e;
197 PhysPageEntry *p;
199 ret = map->nodes_nb++;
200 p = map->nodes[ret];
201 assert(ret != PHYS_MAP_NODE_NIL);
202 assert(ret != map->nodes_nb_alloc);
204 e.skip = leaf ? 0 : 1;
205 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
206 for (i = 0; i < P_L2_SIZE; ++i) {
207 memcpy(&p[i], &e, sizeof(e));
209 return ret;
212 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
213 hwaddr *index, hwaddr *nb, uint16_t leaf,
214 int level)
216 PhysPageEntry *p;
217 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
219 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
220 lp->ptr = phys_map_node_alloc(map, level == 0);
222 p = map->nodes[lp->ptr];
223 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
225 while (*nb && lp < &p[P_L2_SIZE]) {
226 if ((*index & (step - 1)) == 0 && *nb >= step) {
227 lp->skip = 0;
228 lp->ptr = leaf;
229 *index += step;
230 *nb -= step;
231 } else {
232 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
234 ++lp;
238 static void phys_page_set(AddressSpaceDispatch *d,
239 hwaddr index, hwaddr nb,
240 uint16_t leaf)
242 /* Wildly overreserve - it doesn't matter much. */
243 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
245 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
248 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
249 * and update our entry so we can skip it and go directly to the destination.
251 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
253 unsigned valid_ptr = P_L2_SIZE;
254 int valid = 0;
255 PhysPageEntry *p;
256 int i;
258 if (lp->ptr == PHYS_MAP_NODE_NIL) {
259 return;
262 p = nodes[lp->ptr];
263 for (i = 0; i < P_L2_SIZE; i++) {
264 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
265 continue;
268 valid_ptr = i;
269 valid++;
270 if (p[i].skip) {
271 phys_page_compact(&p[i], nodes, compacted);
275 /* We can only compress if there's only one child. */
276 if (valid != 1) {
277 return;
280 assert(valid_ptr < P_L2_SIZE);
282 /* Don't compress if it won't fit in the # of bits we have. */
283 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
284 return;
287 lp->ptr = p[valid_ptr].ptr;
288 if (!p[valid_ptr].skip) {
289 /* If our only child is a leaf, make this a leaf. */
290 /* By design, we should have made this node a leaf to begin with so we
291 * should never reach here.
292 * But since it's so simple to handle this, let's do it just in case we
293 * change this rule.
295 lp->skip = 0;
296 } else {
297 lp->skip += p[valid_ptr].skip;
301 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
303 DECLARE_BITMAP(compacted, nodes_nb);
305 if (d->phys_map.skip) {
306 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
310 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
311 Node *nodes, MemoryRegionSection *sections)
313 PhysPageEntry *p;
314 hwaddr index = addr >> TARGET_PAGE_BITS;
315 int i;
317 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
318 if (lp.ptr == PHYS_MAP_NODE_NIL) {
319 return &sections[PHYS_SECTION_UNASSIGNED];
321 p = nodes[lp.ptr];
322 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
325 if (sections[lp.ptr].size.hi ||
326 range_covers_byte(sections[lp.ptr].offset_within_address_space,
327 sections[lp.ptr].size.lo, addr)) {
328 return &sections[lp.ptr];
329 } else {
330 return &sections[PHYS_SECTION_UNASSIGNED];
334 bool memory_region_is_unassigned(MemoryRegion *mr)
336 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
337 && mr != &io_mem_watch;
340 /* Called from RCU critical section */
341 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
342 hwaddr addr,
343 bool resolve_subpage)
345 MemoryRegionSection *section;
346 subpage_t *subpage;
348 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
349 if (resolve_subpage && section->mr->subpage) {
350 subpage = container_of(section->mr, subpage_t, iomem);
351 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
353 return section;
356 /* Called from RCU critical section */
357 static MemoryRegionSection *
358 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
359 hwaddr *plen, bool resolve_subpage)
361 MemoryRegionSection *section;
362 MemoryRegion *mr;
363 Int128 diff;
365 section = address_space_lookup_region(d, addr, resolve_subpage);
366 /* Compute offset within MemoryRegionSection */
367 addr -= section->offset_within_address_space;
369 /* Compute offset within MemoryRegion */
370 *xlat = addr + section->offset_within_region;
372 mr = section->mr;
374 /* MMIO registers can be expected to perform full-width accesses based only
375 * on their address, without considering adjacent registers that could
376 * decode to completely different MemoryRegions. When such registers
377 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
378 * regions overlap wildly. For this reason we cannot clamp the accesses
379 * here.
381 * If the length is small (as is the case for address_space_ldl/stl),
382 * everything works fine. If the incoming length is large, however,
383 * the caller really has to do the clamping through memory_access_size.
385 if (memory_region_is_ram(mr)) {
386 diff = int128_sub(section->size, int128_make64(addr));
387 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
389 return section;
392 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
394 if (memory_region_is_ram(mr)) {
395 return !(is_write && mr->readonly);
397 if (memory_region_is_romd(mr)) {
398 return !is_write;
401 return false;
404 /* Called from RCU critical section */
405 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
406 hwaddr *xlat, hwaddr *plen,
407 bool is_write)
409 IOMMUTLBEntry iotlb;
410 MemoryRegionSection *section;
411 MemoryRegion *mr;
413 for (;;) {
414 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
415 section = address_space_translate_internal(d, addr, &addr, plen, true);
416 mr = section->mr;
418 if (!mr->iommu_ops) {
419 break;
422 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
423 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
424 | (addr & iotlb.addr_mask));
425 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
426 if (!(iotlb.perm & (1 << is_write))) {
427 mr = &io_mem_unassigned;
428 break;
431 as = iotlb.target_as;
434 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
435 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
436 *plen = MIN(page, *plen);
439 *xlat = addr;
440 return mr;
443 /* Called from RCU critical section */
444 MemoryRegionSection *
445 address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
446 hwaddr *xlat, hwaddr *plen)
448 MemoryRegionSection *section;
449 section = address_space_translate_internal(cpu->cpu_ases[0].memory_dispatch,
450 addr, xlat, plen, false);
452 assert(!section->mr->iommu_ops);
453 return section;
455 #endif
457 #if !defined(CONFIG_USER_ONLY)
459 static int cpu_common_post_load(void *opaque, int version_id)
461 CPUState *cpu = opaque;
463 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
464 version_id is increased. */
465 cpu->interrupt_request &= ~0x01;
466 tlb_flush(cpu, 1);
468 return 0;
471 static int cpu_common_pre_load(void *opaque)
473 CPUState *cpu = opaque;
475 cpu->exception_index = -1;
477 return 0;
480 static bool cpu_common_exception_index_needed(void *opaque)
482 CPUState *cpu = opaque;
484 return tcg_enabled() && cpu->exception_index != -1;
487 static const VMStateDescription vmstate_cpu_common_exception_index = {
488 .name = "cpu_common/exception_index",
489 .version_id = 1,
490 .minimum_version_id = 1,
491 .needed = cpu_common_exception_index_needed,
492 .fields = (VMStateField[]) {
493 VMSTATE_INT32(exception_index, CPUState),
494 VMSTATE_END_OF_LIST()
498 static bool cpu_common_crash_occurred_needed(void *opaque)
500 CPUState *cpu = opaque;
502 return cpu->crash_occurred;
505 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
506 .name = "cpu_common/crash_occurred",
507 .version_id = 1,
508 .minimum_version_id = 1,
509 .needed = cpu_common_crash_occurred_needed,
510 .fields = (VMStateField[]) {
511 VMSTATE_BOOL(crash_occurred, CPUState),
512 VMSTATE_END_OF_LIST()
516 const VMStateDescription vmstate_cpu_common = {
517 .name = "cpu_common",
518 .version_id = 1,
519 .minimum_version_id = 1,
520 .pre_load = cpu_common_pre_load,
521 .post_load = cpu_common_post_load,
522 .fields = (VMStateField[]) {
523 VMSTATE_UINT32(halted, CPUState),
524 VMSTATE_UINT32(interrupt_request, CPUState),
525 VMSTATE_END_OF_LIST()
527 .subsections = (const VMStateDescription*[]) {
528 &vmstate_cpu_common_exception_index,
529 &vmstate_cpu_common_crash_occurred,
530 NULL
534 #endif
536 CPUState *qemu_get_cpu(int index)
538 CPUState *cpu;
540 CPU_FOREACH(cpu) {
541 if (cpu->cpu_index == index) {
542 return cpu;
546 return NULL;
549 #if !defined(CONFIG_USER_ONLY)
550 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
552 /* We only support one address space per cpu at the moment. */
553 assert(cpu->as == as);
555 if (cpu->cpu_ases) {
556 /* We've already registered the listener for our only AS */
557 return;
560 cpu->cpu_ases = g_new0(CPUAddressSpace, 1);
561 cpu->cpu_ases[0].cpu = cpu;
562 cpu->cpu_ases[0].as = as;
563 cpu->cpu_ases[0].tcg_as_listener.commit = tcg_commit;
564 memory_listener_register(&cpu->cpu_ases[0].tcg_as_listener, as);
566 #endif
568 #ifndef CONFIG_USER_ONLY
569 static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
571 static int cpu_get_free_index(Error **errp)
573 int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
575 if (cpu >= MAX_CPUMASK_BITS) {
576 error_setg(errp, "Trying to use more CPUs than max of %d",
577 MAX_CPUMASK_BITS);
578 return -1;
581 bitmap_set(cpu_index_map, cpu, 1);
582 return cpu;
585 void cpu_exec_exit(CPUState *cpu)
587 if (cpu->cpu_index == -1) {
588 /* cpu_index was never allocated by this @cpu or was already freed. */
589 return;
592 bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
593 cpu->cpu_index = -1;
595 #else
597 static int cpu_get_free_index(Error **errp)
599 CPUState *some_cpu;
600 int cpu_index = 0;
602 CPU_FOREACH(some_cpu) {
603 cpu_index++;
605 return cpu_index;
608 void cpu_exec_exit(CPUState *cpu)
611 #endif
613 void cpu_exec_init(CPUState *cpu, Error **errp)
615 CPUClass *cc = CPU_GET_CLASS(cpu);
616 int cpu_index;
617 Error *local_err = NULL;
619 #ifndef CONFIG_USER_ONLY
620 cpu->as = &address_space_memory;
621 cpu->thread_id = qemu_get_thread_id();
622 #endif
624 #if defined(CONFIG_USER_ONLY)
625 cpu_list_lock();
626 #endif
627 cpu_index = cpu->cpu_index = cpu_get_free_index(&local_err);
628 if (local_err) {
629 error_propagate(errp, local_err);
630 #if defined(CONFIG_USER_ONLY)
631 cpu_list_unlock();
632 #endif
633 return;
635 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
636 #if defined(CONFIG_USER_ONLY)
637 cpu_list_unlock();
638 #endif
639 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
640 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
642 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
643 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
644 cpu_save, cpu_load, cpu->env_ptr);
645 assert(cc->vmsd == NULL);
646 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
647 #endif
648 if (cc->vmsd != NULL) {
649 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
653 #if defined(CONFIG_USER_ONLY)
654 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
656 tb_invalidate_phys_page_range(pc, pc + 1, 0);
658 #else
659 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
661 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
662 if (phys != -1) {
663 tb_invalidate_phys_addr(cpu->as,
664 phys | (pc & ~TARGET_PAGE_MASK));
667 #endif
669 #if defined(CONFIG_USER_ONLY)
670 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
675 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
676 int flags)
678 return -ENOSYS;
681 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
685 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
686 int flags, CPUWatchpoint **watchpoint)
688 return -ENOSYS;
690 #else
691 /* Add a watchpoint. */
692 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
693 int flags, CPUWatchpoint **watchpoint)
695 CPUWatchpoint *wp;
697 /* forbid ranges which are empty or run off the end of the address space */
698 if (len == 0 || (addr + len - 1) < addr) {
699 error_report("tried to set invalid watchpoint at %"
700 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
701 return -EINVAL;
703 wp = g_malloc(sizeof(*wp));
705 wp->vaddr = addr;
706 wp->len = len;
707 wp->flags = flags;
709 /* keep all GDB-injected watchpoints in front */
710 if (flags & BP_GDB) {
711 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
712 } else {
713 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
716 tlb_flush_page(cpu, addr);
718 if (watchpoint)
719 *watchpoint = wp;
720 return 0;
723 /* Remove a specific watchpoint. */
724 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
725 int flags)
727 CPUWatchpoint *wp;
729 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
730 if (addr == wp->vaddr && len == wp->len
731 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
732 cpu_watchpoint_remove_by_ref(cpu, wp);
733 return 0;
736 return -ENOENT;
739 /* Remove a specific watchpoint by reference. */
740 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
742 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
744 tlb_flush_page(cpu, watchpoint->vaddr);
746 g_free(watchpoint);
749 /* Remove all matching watchpoints. */
750 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
752 CPUWatchpoint *wp, *next;
754 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
755 if (wp->flags & mask) {
756 cpu_watchpoint_remove_by_ref(cpu, wp);
761 /* Return true if this watchpoint address matches the specified
762 * access (ie the address range covered by the watchpoint overlaps
763 * partially or completely with the address range covered by the
764 * access).
766 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
767 vaddr addr,
768 vaddr len)
770 /* We know the lengths are non-zero, but a little caution is
771 * required to avoid errors in the case where the range ends
772 * exactly at the top of the address space and so addr + len
773 * wraps round to zero.
775 vaddr wpend = wp->vaddr + wp->len - 1;
776 vaddr addrend = addr + len - 1;
778 return !(addr > wpend || wp->vaddr > addrend);
781 #endif
783 /* Add a breakpoint. */
784 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
785 CPUBreakpoint **breakpoint)
787 CPUBreakpoint *bp;
789 bp = g_malloc(sizeof(*bp));
791 bp->pc = pc;
792 bp->flags = flags;
794 /* keep all GDB-injected breakpoints in front */
795 if (flags & BP_GDB) {
796 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
797 } else {
798 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
801 breakpoint_invalidate(cpu, pc);
803 if (breakpoint) {
804 *breakpoint = bp;
806 return 0;
809 /* Remove a specific breakpoint. */
810 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
812 CPUBreakpoint *bp;
814 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
815 if (bp->pc == pc && bp->flags == flags) {
816 cpu_breakpoint_remove_by_ref(cpu, bp);
817 return 0;
820 return -ENOENT;
823 /* Remove a specific breakpoint by reference. */
824 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
826 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
828 breakpoint_invalidate(cpu, breakpoint->pc);
830 g_free(breakpoint);
833 /* Remove all matching breakpoints. */
834 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
836 CPUBreakpoint *bp, *next;
838 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
839 if (bp->flags & mask) {
840 cpu_breakpoint_remove_by_ref(cpu, bp);
845 /* enable or disable single step mode. EXCP_DEBUG is returned by the
846 CPU loop after each instruction */
847 void cpu_single_step(CPUState *cpu, int enabled)
849 if (cpu->singlestep_enabled != enabled) {
850 cpu->singlestep_enabled = enabled;
851 if (kvm_enabled()) {
852 kvm_update_guest_debug(cpu, 0);
853 } else {
854 /* must flush all the translated code to avoid inconsistencies */
855 /* XXX: only flush what is necessary */
856 tb_flush(cpu);
861 void cpu_abort(CPUState *cpu, const char *fmt, ...)
863 va_list ap;
864 va_list ap2;
866 va_start(ap, fmt);
867 va_copy(ap2, ap);
868 fprintf(stderr, "qemu: fatal: ");
869 vfprintf(stderr, fmt, ap);
870 fprintf(stderr, "\n");
871 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
872 if (qemu_log_enabled()) {
873 qemu_log("qemu: fatal: ");
874 qemu_log_vprintf(fmt, ap2);
875 qemu_log("\n");
876 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
877 qemu_log_flush();
878 qemu_log_close();
880 va_end(ap2);
881 va_end(ap);
882 #if defined(CONFIG_USER_ONLY)
884 struct sigaction act;
885 sigfillset(&act.sa_mask);
886 act.sa_handler = SIG_DFL;
887 sigaction(SIGABRT, &act, NULL);
889 #endif
890 abort();
893 #if !defined(CONFIG_USER_ONLY)
894 /* Called from RCU critical section */
895 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
897 RAMBlock *block;
899 block = atomic_rcu_read(&ram_list.mru_block);
900 if (block && addr - block->offset < block->max_length) {
901 goto found;
903 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
904 if (addr - block->offset < block->max_length) {
905 goto found;
909 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
910 abort();
912 found:
913 /* It is safe to write mru_block outside the iothread lock. This
914 * is what happens:
916 * mru_block = xxx
917 * rcu_read_unlock()
918 * xxx removed from list
919 * rcu_read_lock()
920 * read mru_block
921 * mru_block = NULL;
922 * call_rcu(reclaim_ramblock, xxx);
923 * rcu_read_unlock()
925 * atomic_rcu_set is not needed here. The block was already published
926 * when it was placed into the list. Here we're just making an extra
927 * copy of the pointer.
929 ram_list.mru_block = block;
930 return block;
933 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
935 CPUState *cpu;
936 ram_addr_t start1;
937 RAMBlock *block;
938 ram_addr_t end;
940 end = TARGET_PAGE_ALIGN(start + length);
941 start &= TARGET_PAGE_MASK;
943 rcu_read_lock();
944 block = qemu_get_ram_block(start);
945 assert(block == qemu_get_ram_block(end - 1));
946 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
947 CPU_FOREACH(cpu) {
948 tlb_reset_dirty(cpu, start1, length);
950 rcu_read_unlock();
953 /* Note: start and end must be within the same ram block. */
954 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
955 ram_addr_t length,
956 unsigned client)
958 unsigned long end, page;
959 bool dirty;
961 if (length == 0) {
962 return false;
965 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
966 page = start >> TARGET_PAGE_BITS;
967 dirty = bitmap_test_and_clear_atomic(ram_list.dirty_memory[client],
968 page, end - page);
970 if (dirty && tcg_enabled()) {
971 tlb_reset_dirty_range_all(start, length);
974 return dirty;
977 /* Called from RCU critical section */
978 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
979 MemoryRegionSection *section,
980 target_ulong vaddr,
981 hwaddr paddr, hwaddr xlat,
982 int prot,
983 target_ulong *address)
985 hwaddr iotlb;
986 CPUWatchpoint *wp;
988 if (memory_region_is_ram(section->mr)) {
989 /* Normal RAM. */
990 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
991 + xlat;
992 if (!section->readonly) {
993 iotlb |= PHYS_SECTION_NOTDIRTY;
994 } else {
995 iotlb |= PHYS_SECTION_ROM;
997 } else {
998 AddressSpaceDispatch *d;
1000 d = atomic_rcu_read(&section->address_space->dispatch);
1001 iotlb = section - d->map.sections;
1002 iotlb += xlat;
1005 /* Make accesses to pages with watchpoints go via the
1006 watchpoint trap routines. */
1007 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1008 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1009 /* Avoid trapping reads of pages with a write breakpoint. */
1010 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1011 iotlb = PHYS_SECTION_WATCH + paddr;
1012 *address |= TLB_MMIO;
1013 break;
1018 return iotlb;
1020 #endif /* defined(CONFIG_USER_ONLY) */
1022 #if !defined(CONFIG_USER_ONLY)
1024 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1025 uint16_t section);
1026 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1028 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1029 qemu_anon_ram_alloc;
1032 * Set a custom physical guest memory alloator.
1033 * Accelerators with unusual needs may need this. Hopefully, we can
1034 * get rid of it eventually.
1036 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1038 phys_mem_alloc = alloc;
1041 static uint16_t phys_section_add(PhysPageMap *map,
1042 MemoryRegionSection *section)
1044 /* The physical section number is ORed with a page-aligned
1045 * pointer to produce the iotlb entries. Thus it should
1046 * never overflow into the page-aligned value.
1048 assert(map->sections_nb < TARGET_PAGE_SIZE);
1050 if (map->sections_nb == map->sections_nb_alloc) {
1051 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1052 map->sections = g_renew(MemoryRegionSection, map->sections,
1053 map->sections_nb_alloc);
1055 map->sections[map->sections_nb] = *section;
1056 memory_region_ref(section->mr);
1057 return map->sections_nb++;
1060 static void phys_section_destroy(MemoryRegion *mr)
1062 memory_region_unref(mr);
1064 if (mr->subpage) {
1065 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1066 object_unref(OBJECT(&subpage->iomem));
1067 g_free(subpage);
1071 static void phys_sections_free(PhysPageMap *map)
1073 while (map->sections_nb > 0) {
1074 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1075 phys_section_destroy(section->mr);
1077 g_free(map->sections);
1078 g_free(map->nodes);
1081 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1083 subpage_t *subpage;
1084 hwaddr base = section->offset_within_address_space
1085 & TARGET_PAGE_MASK;
1086 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1087 d->map.nodes, d->map.sections);
1088 MemoryRegionSection subsection = {
1089 .offset_within_address_space = base,
1090 .size = int128_make64(TARGET_PAGE_SIZE),
1092 hwaddr start, end;
1094 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1096 if (!(existing->mr->subpage)) {
1097 subpage = subpage_init(d->as, base);
1098 subsection.address_space = d->as;
1099 subsection.mr = &subpage->iomem;
1100 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1101 phys_section_add(&d->map, &subsection));
1102 } else {
1103 subpage = container_of(existing->mr, subpage_t, iomem);
1105 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1106 end = start + int128_get64(section->size) - 1;
1107 subpage_register(subpage, start, end,
1108 phys_section_add(&d->map, section));
1112 static void register_multipage(AddressSpaceDispatch *d,
1113 MemoryRegionSection *section)
1115 hwaddr start_addr = section->offset_within_address_space;
1116 uint16_t section_index = phys_section_add(&d->map, section);
1117 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1118 TARGET_PAGE_BITS));
1120 assert(num_pages);
1121 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1124 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1126 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1127 AddressSpaceDispatch *d = as->next_dispatch;
1128 MemoryRegionSection now = *section, remain = *section;
1129 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1131 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1132 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1133 - now.offset_within_address_space;
1135 now.size = int128_min(int128_make64(left), now.size);
1136 register_subpage(d, &now);
1137 } else {
1138 now.size = int128_zero();
1140 while (int128_ne(remain.size, now.size)) {
1141 remain.size = int128_sub(remain.size, now.size);
1142 remain.offset_within_address_space += int128_get64(now.size);
1143 remain.offset_within_region += int128_get64(now.size);
1144 now = remain;
1145 if (int128_lt(remain.size, page_size)) {
1146 register_subpage(d, &now);
1147 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1148 now.size = page_size;
1149 register_subpage(d, &now);
1150 } else {
1151 now.size = int128_and(now.size, int128_neg(page_size));
1152 register_multipage(d, &now);
1157 void qemu_flush_coalesced_mmio_buffer(void)
1159 if (kvm_enabled())
1160 kvm_flush_coalesced_mmio_buffer();
1163 void qemu_mutex_lock_ramlist(void)
1165 qemu_mutex_lock(&ram_list.mutex);
1168 void qemu_mutex_unlock_ramlist(void)
1170 qemu_mutex_unlock(&ram_list.mutex);
1173 #ifdef __linux__
1175 #include <sys/vfs.h>
1177 #define HUGETLBFS_MAGIC 0x958458f6
1179 static long gethugepagesize(const char *path, Error **errp)
1181 struct statfs fs;
1182 int ret;
1184 do {
1185 ret = statfs(path, &fs);
1186 } while (ret != 0 && errno == EINTR);
1188 if (ret != 0) {
1189 error_setg_errno(errp, errno, "failed to get page size of file %s",
1190 path);
1191 return 0;
1194 if (fs.f_type != HUGETLBFS_MAGIC)
1195 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1197 return fs.f_bsize;
1200 static void *file_ram_alloc(RAMBlock *block,
1201 ram_addr_t memory,
1202 const char *path,
1203 Error **errp)
1205 char *filename;
1206 char *sanitized_name;
1207 char *c;
1208 void *ptr;
1209 void *area = NULL;
1210 int fd;
1211 uint64_t hpagesize;
1212 uint64_t total;
1213 Error *local_err = NULL;
1214 size_t offset;
1216 hpagesize = gethugepagesize(path, &local_err);
1217 if (local_err) {
1218 error_propagate(errp, local_err);
1219 goto error;
1221 block->mr->align = hpagesize;
1223 if (memory < hpagesize) {
1224 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1225 "or larger than huge page size 0x%" PRIx64,
1226 memory, hpagesize);
1227 goto error;
1230 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1231 error_setg(errp,
1232 "host lacks kvm mmu notifiers, -mem-path unsupported");
1233 goto error;
1236 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1237 sanitized_name = g_strdup(memory_region_name(block->mr));
1238 for (c = sanitized_name; *c != '\0'; c++) {
1239 if (*c == '/')
1240 *c = '_';
1243 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1244 sanitized_name);
1245 g_free(sanitized_name);
1247 fd = mkstemp(filename);
1248 if (fd < 0) {
1249 error_setg_errno(errp, errno,
1250 "unable to create backing store for hugepages");
1251 g_free(filename);
1252 goto error;
1254 unlink(filename);
1255 g_free(filename);
1257 memory = ROUND_UP(memory, hpagesize);
1258 total = memory + hpagesize;
1261 * ftruncate is not supported by hugetlbfs in older
1262 * hosts, so don't bother bailing out on errors.
1263 * If anything goes wrong with it under other filesystems,
1264 * mmap will fail.
1266 if (ftruncate(fd, memory)) {
1267 perror("ftruncate");
1270 ptr = mmap(0, total, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS,
1271 -1, 0);
1272 if (ptr == MAP_FAILED) {
1273 error_setg_errno(errp, errno,
1274 "unable to allocate memory range for hugepages");
1275 close(fd);
1276 goto error;
1279 offset = QEMU_ALIGN_UP((uintptr_t)ptr, hpagesize) - (uintptr_t)ptr;
1281 area = mmap(ptr + offset, memory, PROT_READ | PROT_WRITE,
1282 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE) |
1283 MAP_FIXED,
1284 fd, 0);
1285 if (area == MAP_FAILED) {
1286 error_setg_errno(errp, errno,
1287 "unable to map backing store for hugepages");
1288 munmap(ptr, total);
1289 close(fd);
1290 goto error;
1293 if (offset > 0) {
1294 munmap(ptr, offset);
1296 ptr += offset;
1297 total -= offset;
1299 if (total > memory + getpagesize()) {
1300 munmap(ptr + memory + getpagesize(),
1301 total - memory - getpagesize());
1304 if (mem_prealloc) {
1305 os_mem_prealloc(fd, area, memory);
1308 block->fd = fd;
1309 return area;
1311 error:
1312 if (mem_prealloc) {
1313 error_report("%s", error_get_pretty(*errp));
1314 exit(1);
1316 return NULL;
1318 #endif
1320 /* Called with the ramlist lock held. */
1321 static ram_addr_t find_ram_offset(ram_addr_t size)
1323 RAMBlock *block, *next_block;
1324 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1326 assert(size != 0); /* it would hand out same offset multiple times */
1328 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1329 return 0;
1332 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1333 ram_addr_t end, next = RAM_ADDR_MAX;
1335 end = block->offset + block->max_length;
1337 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1338 if (next_block->offset >= end) {
1339 next = MIN(next, next_block->offset);
1342 if (next - end >= size && next - end < mingap) {
1343 offset = end;
1344 mingap = next - end;
1348 if (offset == RAM_ADDR_MAX) {
1349 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1350 (uint64_t)size);
1351 abort();
1354 return offset;
1357 ram_addr_t last_ram_offset(void)
1359 RAMBlock *block;
1360 ram_addr_t last = 0;
1362 rcu_read_lock();
1363 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1364 last = MAX(last, block->offset + block->max_length);
1366 rcu_read_unlock();
1367 return last;
1370 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1372 int ret;
1374 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1375 if (!machine_dump_guest_core(current_machine)) {
1376 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1377 if (ret) {
1378 perror("qemu_madvise");
1379 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1380 "but dump_guest_core=off specified\n");
1385 /* Called within an RCU critical section, or while the ramlist lock
1386 * is held.
1388 static RAMBlock *find_ram_block(ram_addr_t addr)
1390 RAMBlock *block;
1392 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1393 if (block->offset == addr) {
1394 return block;
1398 return NULL;
1401 /* Called with iothread lock held. */
1402 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1404 RAMBlock *new_block, *block;
1406 rcu_read_lock();
1407 new_block = find_ram_block(addr);
1408 assert(new_block);
1409 assert(!new_block->idstr[0]);
1411 if (dev) {
1412 char *id = qdev_get_dev_path(dev);
1413 if (id) {
1414 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1415 g_free(id);
1418 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1420 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1421 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1422 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1423 new_block->idstr);
1424 abort();
1427 rcu_read_unlock();
1430 /* Called with iothread lock held. */
1431 void qemu_ram_unset_idstr(ram_addr_t addr)
1433 RAMBlock *block;
1435 /* FIXME: arch_init.c assumes that this is not called throughout
1436 * migration. Ignore the problem since hot-unplug during migration
1437 * does not work anyway.
1440 rcu_read_lock();
1441 block = find_ram_block(addr);
1442 if (block) {
1443 memset(block->idstr, 0, sizeof(block->idstr));
1445 rcu_read_unlock();
1448 static int memory_try_enable_merging(void *addr, size_t len)
1450 if (!machine_mem_merge(current_machine)) {
1451 /* disabled by the user */
1452 return 0;
1455 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1458 /* Only legal before guest might have detected the memory size: e.g. on
1459 * incoming migration, or right after reset.
1461 * As memory core doesn't know how is memory accessed, it is up to
1462 * resize callback to update device state and/or add assertions to detect
1463 * misuse, if necessary.
1465 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1467 RAMBlock *block = find_ram_block(base);
1469 assert(block);
1471 newsize = TARGET_PAGE_ALIGN(newsize);
1473 if (block->used_length == newsize) {
1474 return 0;
1477 if (!(block->flags & RAM_RESIZEABLE)) {
1478 error_setg_errno(errp, EINVAL,
1479 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1480 " in != 0x" RAM_ADDR_FMT, block->idstr,
1481 newsize, block->used_length);
1482 return -EINVAL;
1485 if (block->max_length < newsize) {
1486 error_setg_errno(errp, EINVAL,
1487 "Length too large: %s: 0x" RAM_ADDR_FMT
1488 " > 0x" RAM_ADDR_FMT, block->idstr,
1489 newsize, block->max_length);
1490 return -EINVAL;
1493 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1494 block->used_length = newsize;
1495 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1496 DIRTY_CLIENTS_ALL);
1497 memory_region_set_size(block->mr, newsize);
1498 if (block->resized) {
1499 block->resized(block->idstr, newsize, block->host);
1501 return 0;
1504 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1506 RAMBlock *block;
1507 RAMBlock *last_block = NULL;
1508 ram_addr_t old_ram_size, new_ram_size;
1510 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1512 qemu_mutex_lock_ramlist();
1513 new_block->offset = find_ram_offset(new_block->max_length);
1515 if (!new_block->host) {
1516 if (xen_enabled()) {
1517 xen_ram_alloc(new_block->offset, new_block->max_length,
1518 new_block->mr);
1519 } else {
1520 new_block->host = phys_mem_alloc(new_block->max_length,
1521 &new_block->mr->align);
1522 if (!new_block->host) {
1523 error_setg_errno(errp, errno,
1524 "cannot set up guest memory '%s'",
1525 memory_region_name(new_block->mr));
1526 qemu_mutex_unlock_ramlist();
1527 return -1;
1529 memory_try_enable_merging(new_block->host, new_block->max_length);
1533 new_ram_size = MAX(old_ram_size,
1534 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1535 if (new_ram_size > old_ram_size) {
1536 migration_bitmap_extend(old_ram_size, new_ram_size);
1538 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1539 * QLIST (which has an RCU-friendly variant) does not have insertion at
1540 * tail, so save the last element in last_block.
1542 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1543 last_block = block;
1544 if (block->max_length < new_block->max_length) {
1545 break;
1548 if (block) {
1549 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1550 } else if (last_block) {
1551 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1552 } else { /* list is empty */
1553 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1555 ram_list.mru_block = NULL;
1557 /* Write list before version */
1558 smp_wmb();
1559 ram_list.version++;
1560 qemu_mutex_unlock_ramlist();
1562 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1564 if (new_ram_size > old_ram_size) {
1565 int i;
1567 /* ram_list.dirty_memory[] is protected by the iothread lock. */
1568 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1569 ram_list.dirty_memory[i] =
1570 bitmap_zero_extend(ram_list.dirty_memory[i],
1571 old_ram_size, new_ram_size);
1574 cpu_physical_memory_set_dirty_range(new_block->offset,
1575 new_block->used_length,
1576 DIRTY_CLIENTS_ALL);
1578 if (new_block->host) {
1579 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1580 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1581 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1582 if (kvm_enabled()) {
1583 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1587 return new_block->offset;
1590 #ifdef __linux__
1591 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1592 bool share, const char *mem_path,
1593 Error **errp)
1595 RAMBlock *new_block;
1596 ram_addr_t addr;
1597 Error *local_err = NULL;
1599 if (xen_enabled()) {
1600 error_setg(errp, "-mem-path not supported with Xen");
1601 return -1;
1604 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1606 * file_ram_alloc() needs to allocate just like
1607 * phys_mem_alloc, but we haven't bothered to provide
1608 * a hook there.
1610 error_setg(errp,
1611 "-mem-path not supported with this accelerator");
1612 return -1;
1615 size = TARGET_PAGE_ALIGN(size);
1616 new_block = g_malloc0(sizeof(*new_block));
1617 new_block->mr = mr;
1618 new_block->used_length = size;
1619 new_block->max_length = size;
1620 new_block->flags = share ? RAM_SHARED : 0;
1621 new_block->flags |= RAM_EXTRA;
1622 new_block->host = file_ram_alloc(new_block, size,
1623 mem_path, errp);
1624 if (!new_block->host) {
1625 g_free(new_block);
1626 return -1;
1629 addr = ram_block_add(new_block, &local_err);
1630 if (local_err) {
1631 g_free(new_block);
1632 error_propagate(errp, local_err);
1633 return -1;
1635 return addr;
1637 #endif
1639 static
1640 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1641 void (*resized)(const char*,
1642 uint64_t length,
1643 void *host),
1644 void *host, bool resizeable,
1645 MemoryRegion *mr, Error **errp)
1647 RAMBlock *new_block;
1648 ram_addr_t addr;
1649 Error *local_err = NULL;
1651 size = TARGET_PAGE_ALIGN(size);
1652 max_size = TARGET_PAGE_ALIGN(max_size);
1653 new_block = g_malloc0(sizeof(*new_block));
1654 new_block->mr = mr;
1655 new_block->resized = resized;
1656 new_block->used_length = size;
1657 new_block->max_length = max_size;
1658 assert(max_size >= size);
1659 new_block->fd = -1;
1660 new_block->host = host;
1661 if (host) {
1662 new_block->flags |= RAM_PREALLOC;
1664 if (resizeable) {
1665 new_block->flags |= RAM_RESIZEABLE;
1667 addr = ram_block_add(new_block, &local_err);
1668 if (local_err) {
1669 g_free(new_block);
1670 error_propagate(errp, local_err);
1671 return -1;
1673 return addr;
1676 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1677 MemoryRegion *mr, Error **errp)
1679 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1682 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1684 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1687 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1688 void (*resized)(const char*,
1689 uint64_t length,
1690 void *host),
1691 MemoryRegion *mr, Error **errp)
1693 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1696 void qemu_ram_free_from_ptr(ram_addr_t addr)
1698 RAMBlock *block;
1700 qemu_mutex_lock_ramlist();
1701 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1702 if (addr == block->offset) {
1703 QLIST_REMOVE_RCU(block, next);
1704 ram_list.mru_block = NULL;
1705 /* Write list before version */
1706 smp_wmb();
1707 ram_list.version++;
1708 g_free_rcu(block, rcu);
1709 break;
1712 qemu_mutex_unlock_ramlist();
1715 static void reclaim_ramblock(RAMBlock *block)
1717 if (block->flags & RAM_PREALLOC) {
1719 } else if (xen_enabled()) {
1720 xen_invalidate_map_cache_entry(block->host);
1721 #ifndef _WIN32
1722 } else if (block->fd >= 0) {
1723 if (block->flags & RAM_EXTRA) {
1724 munmap(block->host, block->max_length + getpagesize());
1725 } else {
1726 munmap(block->host, block->max_length);
1728 close(block->fd);
1729 #endif
1730 } else {
1731 qemu_anon_ram_free(block->host, block->max_length);
1733 g_free(block);
1736 void qemu_ram_free(ram_addr_t addr)
1738 RAMBlock *block;
1740 qemu_mutex_lock_ramlist();
1741 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1742 if (addr == block->offset) {
1743 QLIST_REMOVE_RCU(block, next);
1744 ram_list.mru_block = NULL;
1745 /* Write list before version */
1746 smp_wmb();
1747 ram_list.version++;
1748 call_rcu(block, reclaim_ramblock, rcu);
1749 break;
1752 qemu_mutex_unlock_ramlist();
1755 #ifndef _WIN32
1756 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1758 RAMBlock *block;
1759 ram_addr_t offset;
1760 int flags;
1761 void *area, *vaddr;
1763 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1764 offset = addr - block->offset;
1765 if (offset < block->max_length) {
1766 vaddr = ramblock_ptr(block, offset);
1767 if (block->flags & RAM_PREALLOC) {
1769 } else if (xen_enabled()) {
1770 abort();
1771 } else {
1772 flags = MAP_FIXED;
1773 if (block->fd >= 0) {
1774 flags |= (block->flags & RAM_SHARED ?
1775 MAP_SHARED : MAP_PRIVATE);
1776 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1777 flags, block->fd, offset);
1778 } else {
1780 * Remap needs to match alloc. Accelerators that
1781 * set phys_mem_alloc never remap. If they did,
1782 * we'd need a remap hook here.
1784 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1786 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1787 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1788 flags, -1, 0);
1790 if (area != vaddr) {
1791 fprintf(stderr, "Could not remap addr: "
1792 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1793 length, addr);
1794 exit(1);
1796 memory_try_enable_merging(vaddr, length);
1797 qemu_ram_setup_dump(vaddr, length);
1802 #endif /* !_WIN32 */
1804 int qemu_get_ram_fd(ram_addr_t addr)
1806 RAMBlock *block;
1807 int fd;
1809 rcu_read_lock();
1810 block = qemu_get_ram_block(addr);
1811 fd = block->fd;
1812 rcu_read_unlock();
1813 return fd;
1816 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1818 RAMBlock *block;
1819 void *ptr;
1821 rcu_read_lock();
1822 block = qemu_get_ram_block(addr);
1823 ptr = ramblock_ptr(block, 0);
1824 rcu_read_unlock();
1825 return ptr;
1828 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1829 * This should not be used for general purpose DMA. Use address_space_map
1830 * or address_space_rw instead. For local memory (e.g. video ram) that the
1831 * device owns, use memory_region_get_ram_ptr.
1833 * By the time this function returns, the returned pointer is not protected
1834 * by RCU anymore. If the caller is not within an RCU critical section and
1835 * does not hold the iothread lock, it must have other means of protecting the
1836 * pointer, such as a reference to the region that includes the incoming
1837 * ram_addr_t.
1839 void *qemu_get_ram_ptr(ram_addr_t addr)
1841 RAMBlock *block;
1842 void *ptr;
1844 rcu_read_lock();
1845 block = qemu_get_ram_block(addr);
1847 if (xen_enabled() && block->host == NULL) {
1848 /* We need to check if the requested address is in the RAM
1849 * because we don't want to map the entire memory in QEMU.
1850 * In that case just map until the end of the page.
1852 if (block->offset == 0) {
1853 ptr = xen_map_cache(addr, 0, 0);
1854 goto unlock;
1857 block->host = xen_map_cache(block->offset, block->max_length, 1);
1859 ptr = ramblock_ptr(block, addr - block->offset);
1861 unlock:
1862 rcu_read_unlock();
1863 return ptr;
1866 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1867 * but takes a size argument.
1869 * By the time this function returns, the returned pointer is not protected
1870 * by RCU anymore. If the caller is not within an RCU critical section and
1871 * does not hold the iothread lock, it must have other means of protecting the
1872 * pointer, such as a reference to the region that includes the incoming
1873 * ram_addr_t.
1875 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1877 void *ptr;
1878 if (*size == 0) {
1879 return NULL;
1881 if (xen_enabled()) {
1882 return xen_map_cache(addr, *size, 1);
1883 } else {
1884 RAMBlock *block;
1885 rcu_read_lock();
1886 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1887 if (addr - block->offset < block->max_length) {
1888 if (addr - block->offset + *size > block->max_length)
1889 *size = block->max_length - addr + block->offset;
1890 ptr = ramblock_ptr(block, addr - block->offset);
1891 rcu_read_unlock();
1892 return ptr;
1896 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1897 abort();
1901 /* Some of the softmmu routines need to translate from a host pointer
1902 * (typically a TLB entry) back to a ram offset.
1904 * By the time this function returns, the returned pointer is not protected
1905 * by RCU anymore. If the caller is not within an RCU critical section and
1906 * does not hold the iothread lock, it must have other means of protecting the
1907 * pointer, such as a reference to the region that includes the incoming
1908 * ram_addr_t.
1910 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1912 RAMBlock *block;
1913 uint8_t *host = ptr;
1914 MemoryRegion *mr;
1916 if (xen_enabled()) {
1917 rcu_read_lock();
1918 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1919 mr = qemu_get_ram_block(*ram_addr)->mr;
1920 rcu_read_unlock();
1921 return mr;
1924 rcu_read_lock();
1925 block = atomic_rcu_read(&ram_list.mru_block);
1926 if (block && block->host && host - block->host < block->max_length) {
1927 goto found;
1930 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1931 /* This case append when the block is not mapped. */
1932 if (block->host == NULL) {
1933 continue;
1935 if (host - block->host < block->max_length) {
1936 goto found;
1940 rcu_read_unlock();
1941 return NULL;
1943 found:
1944 *ram_addr = block->offset + (host - block->host);
1945 mr = block->mr;
1946 rcu_read_unlock();
1947 return mr;
1950 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1951 uint64_t val, unsigned size)
1953 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1954 tb_invalidate_phys_page_fast(ram_addr, size);
1956 switch (size) {
1957 case 1:
1958 stb_p(qemu_get_ram_ptr(ram_addr), val);
1959 break;
1960 case 2:
1961 stw_p(qemu_get_ram_ptr(ram_addr), val);
1962 break;
1963 case 4:
1964 stl_p(qemu_get_ram_ptr(ram_addr), val);
1965 break;
1966 default:
1967 abort();
1969 /* Set both VGA and migration bits for simplicity and to remove
1970 * the notdirty callback faster.
1972 cpu_physical_memory_set_dirty_range(ram_addr, size,
1973 DIRTY_CLIENTS_NOCODE);
1974 /* we remove the notdirty callback only if the code has been
1975 flushed */
1976 if (!cpu_physical_memory_is_clean(ram_addr)) {
1977 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
1981 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1982 unsigned size, bool is_write)
1984 return is_write;
1987 static const MemoryRegionOps notdirty_mem_ops = {
1988 .write = notdirty_mem_write,
1989 .valid.accepts = notdirty_mem_accepts,
1990 .endianness = DEVICE_NATIVE_ENDIAN,
1993 /* Generate a debug exception if a watchpoint has been hit. */
1994 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
1996 CPUState *cpu = current_cpu;
1997 CPUArchState *env = cpu->env_ptr;
1998 target_ulong pc, cs_base;
1999 target_ulong vaddr;
2000 CPUWatchpoint *wp;
2001 int cpu_flags;
2003 if (cpu->watchpoint_hit) {
2004 /* We re-entered the check after replacing the TB. Now raise
2005 * the debug interrupt so that is will trigger after the
2006 * current instruction. */
2007 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2008 return;
2010 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2011 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2012 if (cpu_watchpoint_address_matches(wp, vaddr, len)
2013 && (wp->flags & flags)) {
2014 if (flags == BP_MEM_READ) {
2015 wp->flags |= BP_WATCHPOINT_HIT_READ;
2016 } else {
2017 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2019 wp->hitaddr = vaddr;
2020 wp->hitattrs = attrs;
2021 if (!cpu->watchpoint_hit) {
2022 cpu->watchpoint_hit = wp;
2023 tb_check_watchpoint(cpu);
2024 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2025 cpu->exception_index = EXCP_DEBUG;
2026 cpu_loop_exit(cpu);
2027 } else {
2028 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2029 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2030 cpu_resume_from_signal(cpu, NULL);
2033 } else {
2034 wp->flags &= ~BP_WATCHPOINT_HIT;
2039 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2040 so these check for a hit then pass through to the normal out-of-line
2041 phys routines. */
2042 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2043 unsigned size, MemTxAttrs attrs)
2045 MemTxResult res;
2046 uint64_t data;
2048 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2049 switch (size) {
2050 case 1:
2051 data = address_space_ldub(&address_space_memory, addr, attrs, &res);
2052 break;
2053 case 2:
2054 data = address_space_lduw(&address_space_memory, addr, attrs, &res);
2055 break;
2056 case 4:
2057 data = address_space_ldl(&address_space_memory, addr, attrs, &res);
2058 break;
2059 default: abort();
2061 *pdata = data;
2062 return res;
2065 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2066 uint64_t val, unsigned size,
2067 MemTxAttrs attrs)
2069 MemTxResult res;
2071 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2072 switch (size) {
2073 case 1:
2074 address_space_stb(&address_space_memory, addr, val, attrs, &res);
2075 break;
2076 case 2:
2077 address_space_stw(&address_space_memory, addr, val, attrs, &res);
2078 break;
2079 case 4:
2080 address_space_stl(&address_space_memory, addr, val, attrs, &res);
2081 break;
2082 default: abort();
2084 return res;
2087 static const MemoryRegionOps watch_mem_ops = {
2088 .read_with_attrs = watch_mem_read,
2089 .write_with_attrs = watch_mem_write,
2090 .endianness = DEVICE_NATIVE_ENDIAN,
2093 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2094 unsigned len, MemTxAttrs attrs)
2096 subpage_t *subpage = opaque;
2097 uint8_t buf[8];
2098 MemTxResult res;
2100 #if defined(DEBUG_SUBPAGE)
2101 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2102 subpage, len, addr);
2103 #endif
2104 res = address_space_read(subpage->as, addr + subpage->base,
2105 attrs, buf, len);
2106 if (res) {
2107 return res;
2109 switch (len) {
2110 case 1:
2111 *data = ldub_p(buf);
2112 return MEMTX_OK;
2113 case 2:
2114 *data = lduw_p(buf);
2115 return MEMTX_OK;
2116 case 4:
2117 *data = ldl_p(buf);
2118 return MEMTX_OK;
2119 case 8:
2120 *data = ldq_p(buf);
2121 return MEMTX_OK;
2122 default:
2123 abort();
2127 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2128 uint64_t value, unsigned len, MemTxAttrs attrs)
2130 subpage_t *subpage = opaque;
2131 uint8_t buf[8];
2133 #if defined(DEBUG_SUBPAGE)
2134 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2135 " value %"PRIx64"\n",
2136 __func__, subpage, len, addr, value);
2137 #endif
2138 switch (len) {
2139 case 1:
2140 stb_p(buf, value);
2141 break;
2142 case 2:
2143 stw_p(buf, value);
2144 break;
2145 case 4:
2146 stl_p(buf, value);
2147 break;
2148 case 8:
2149 stq_p(buf, value);
2150 break;
2151 default:
2152 abort();
2154 return address_space_write(subpage->as, addr + subpage->base,
2155 attrs, buf, len);
2158 static bool subpage_accepts(void *opaque, hwaddr addr,
2159 unsigned len, bool is_write)
2161 subpage_t *subpage = opaque;
2162 #if defined(DEBUG_SUBPAGE)
2163 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2164 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2165 #endif
2167 return address_space_access_valid(subpage->as, addr + subpage->base,
2168 len, is_write);
2171 static const MemoryRegionOps subpage_ops = {
2172 .read_with_attrs = subpage_read,
2173 .write_with_attrs = subpage_write,
2174 .impl.min_access_size = 1,
2175 .impl.max_access_size = 8,
2176 .valid.min_access_size = 1,
2177 .valid.max_access_size = 8,
2178 .valid.accepts = subpage_accepts,
2179 .endianness = DEVICE_NATIVE_ENDIAN,
2182 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2183 uint16_t section)
2185 int idx, eidx;
2187 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2188 return -1;
2189 idx = SUBPAGE_IDX(start);
2190 eidx = SUBPAGE_IDX(end);
2191 #if defined(DEBUG_SUBPAGE)
2192 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2193 __func__, mmio, start, end, idx, eidx, section);
2194 #endif
2195 for (; idx <= eidx; idx++) {
2196 mmio->sub_section[idx] = section;
2199 return 0;
2202 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2204 subpage_t *mmio;
2206 mmio = g_malloc0(sizeof(subpage_t));
2208 mmio->as = as;
2209 mmio->base = base;
2210 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2211 NULL, TARGET_PAGE_SIZE);
2212 mmio->iomem.subpage = true;
2213 #if defined(DEBUG_SUBPAGE)
2214 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2215 mmio, base, TARGET_PAGE_SIZE);
2216 #endif
2217 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2219 return mmio;
2222 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2223 MemoryRegion *mr)
2225 assert(as);
2226 MemoryRegionSection section = {
2227 .address_space = as,
2228 .mr = mr,
2229 .offset_within_address_space = 0,
2230 .offset_within_region = 0,
2231 .size = int128_2_64(),
2234 return phys_section_add(map, &section);
2237 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
2239 CPUAddressSpace *cpuas = &cpu->cpu_ases[0];
2240 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2241 MemoryRegionSection *sections = d->map.sections;
2243 return sections[index & ~TARGET_PAGE_MASK].mr;
2246 static void io_mem_init(void)
2248 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2249 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2250 NULL, UINT64_MAX);
2251 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2252 NULL, UINT64_MAX);
2253 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2254 NULL, UINT64_MAX);
2257 static void mem_begin(MemoryListener *listener)
2259 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2260 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2261 uint16_t n;
2263 n = dummy_section(&d->map, as, &io_mem_unassigned);
2264 assert(n == PHYS_SECTION_UNASSIGNED);
2265 n = dummy_section(&d->map, as, &io_mem_notdirty);
2266 assert(n == PHYS_SECTION_NOTDIRTY);
2267 n = dummy_section(&d->map, as, &io_mem_rom);
2268 assert(n == PHYS_SECTION_ROM);
2269 n = dummy_section(&d->map, as, &io_mem_watch);
2270 assert(n == PHYS_SECTION_WATCH);
2272 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2273 d->as = as;
2274 as->next_dispatch = d;
2277 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2279 phys_sections_free(&d->map);
2280 g_free(d);
2283 static void mem_commit(MemoryListener *listener)
2285 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2286 AddressSpaceDispatch *cur = as->dispatch;
2287 AddressSpaceDispatch *next = as->next_dispatch;
2289 phys_page_compact_all(next, next->map.nodes_nb);
2291 atomic_rcu_set(&as->dispatch, next);
2292 if (cur) {
2293 call_rcu(cur, address_space_dispatch_free, rcu);
2297 static void tcg_commit(MemoryListener *listener)
2299 CPUAddressSpace *cpuas;
2300 AddressSpaceDispatch *d;
2302 /* since each CPU stores ram addresses in its TLB cache, we must
2303 reset the modified entries */
2304 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2305 cpu_reloading_memory_map();
2306 /* The CPU and TLB are protected by the iothread lock.
2307 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2308 * may have split the RCU critical section.
2310 d = atomic_rcu_read(&cpuas->as->dispatch);
2311 cpuas->memory_dispatch = d;
2312 tlb_flush(cpuas->cpu, 1);
2315 void address_space_init_dispatch(AddressSpace *as)
2317 as->dispatch = NULL;
2318 as->dispatch_listener = (MemoryListener) {
2319 .begin = mem_begin,
2320 .commit = mem_commit,
2321 .region_add = mem_add,
2322 .region_nop = mem_add,
2323 .priority = 0,
2325 memory_listener_register(&as->dispatch_listener, as);
2328 void address_space_unregister(AddressSpace *as)
2330 memory_listener_unregister(&as->dispatch_listener);
2333 void address_space_destroy_dispatch(AddressSpace *as)
2335 AddressSpaceDispatch *d = as->dispatch;
2337 atomic_rcu_set(&as->dispatch, NULL);
2338 if (d) {
2339 call_rcu(d, address_space_dispatch_free, rcu);
2343 static void memory_map_init(void)
2345 system_memory = g_malloc(sizeof(*system_memory));
2347 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2348 address_space_init(&address_space_memory, system_memory, "memory");
2350 system_io = g_malloc(sizeof(*system_io));
2351 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2352 65536);
2353 address_space_init(&address_space_io, system_io, "I/O");
2356 MemoryRegion *get_system_memory(void)
2358 return system_memory;
2361 MemoryRegion *get_system_io(void)
2363 return system_io;
2366 #endif /* !defined(CONFIG_USER_ONLY) */
2368 /* physical memory access (slow version, mainly for debug) */
2369 #if defined(CONFIG_USER_ONLY)
2370 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2371 uint8_t *buf, int len, int is_write)
2373 int l, flags;
2374 target_ulong page;
2375 void * p;
2377 while (len > 0) {
2378 page = addr & TARGET_PAGE_MASK;
2379 l = (page + TARGET_PAGE_SIZE) - addr;
2380 if (l > len)
2381 l = len;
2382 flags = page_get_flags(page);
2383 if (!(flags & PAGE_VALID))
2384 return -1;
2385 if (is_write) {
2386 if (!(flags & PAGE_WRITE))
2387 return -1;
2388 /* XXX: this code should not depend on lock_user */
2389 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2390 return -1;
2391 memcpy(p, buf, l);
2392 unlock_user(p, addr, l);
2393 } else {
2394 if (!(flags & PAGE_READ))
2395 return -1;
2396 /* XXX: this code should not depend on lock_user */
2397 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2398 return -1;
2399 memcpy(buf, p, l);
2400 unlock_user(p, addr, 0);
2402 len -= l;
2403 buf += l;
2404 addr += l;
2406 return 0;
2409 #else
2411 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2412 hwaddr length)
2414 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2415 /* No early return if dirty_log_mask is or becomes 0, because
2416 * cpu_physical_memory_set_dirty_range will still call
2417 * xen_modified_memory.
2419 if (dirty_log_mask) {
2420 dirty_log_mask =
2421 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2423 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2424 tb_invalidate_phys_range(addr, addr + length);
2425 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2427 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2430 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2432 unsigned access_size_max = mr->ops->valid.max_access_size;
2434 /* Regions are assumed to support 1-4 byte accesses unless
2435 otherwise specified. */
2436 if (access_size_max == 0) {
2437 access_size_max = 4;
2440 /* Bound the maximum access by the alignment of the address. */
2441 if (!mr->ops->impl.unaligned) {
2442 unsigned align_size_max = addr & -addr;
2443 if (align_size_max != 0 && align_size_max < access_size_max) {
2444 access_size_max = align_size_max;
2448 /* Don't attempt accesses larger than the maximum. */
2449 if (l > access_size_max) {
2450 l = access_size_max;
2452 l = pow2floor(l);
2454 return l;
2457 static bool prepare_mmio_access(MemoryRegion *mr)
2459 bool unlocked = !qemu_mutex_iothread_locked();
2460 bool release_lock = false;
2462 if (unlocked && mr->global_locking) {
2463 qemu_mutex_lock_iothread();
2464 unlocked = false;
2465 release_lock = true;
2467 if (mr->flush_coalesced_mmio) {
2468 if (unlocked) {
2469 qemu_mutex_lock_iothread();
2471 qemu_flush_coalesced_mmio_buffer();
2472 if (unlocked) {
2473 qemu_mutex_unlock_iothread();
2477 return release_lock;
2480 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2481 uint8_t *buf, int len, bool is_write)
2483 hwaddr l;
2484 uint8_t *ptr;
2485 uint64_t val;
2486 hwaddr addr1;
2487 MemoryRegion *mr;
2488 MemTxResult result = MEMTX_OK;
2489 bool release_lock = false;
2491 rcu_read_lock();
2492 while (len > 0) {
2493 l = len;
2494 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2496 if (is_write) {
2497 if (!memory_access_is_direct(mr, is_write)) {
2498 release_lock |= prepare_mmio_access(mr);
2499 l = memory_access_size(mr, l, addr1);
2500 /* XXX: could force current_cpu to NULL to avoid
2501 potential bugs */
2502 switch (l) {
2503 case 8:
2504 /* 64 bit write access */
2505 val = ldq_p(buf);
2506 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2507 attrs);
2508 break;
2509 case 4:
2510 /* 32 bit write access */
2511 val = ldl_p(buf);
2512 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2513 attrs);
2514 break;
2515 case 2:
2516 /* 16 bit write access */
2517 val = lduw_p(buf);
2518 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2519 attrs);
2520 break;
2521 case 1:
2522 /* 8 bit write access */
2523 val = ldub_p(buf);
2524 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2525 attrs);
2526 break;
2527 default:
2528 abort();
2530 } else {
2531 addr1 += memory_region_get_ram_addr(mr);
2532 /* RAM case */
2533 ptr = qemu_get_ram_ptr(addr1);
2534 memcpy(ptr, buf, l);
2535 invalidate_and_set_dirty(mr, addr1, l);
2537 } else {
2538 if (!memory_access_is_direct(mr, is_write)) {
2539 /* I/O case */
2540 release_lock |= prepare_mmio_access(mr);
2541 l = memory_access_size(mr, l, addr1);
2542 switch (l) {
2543 case 8:
2544 /* 64 bit read access */
2545 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2546 attrs);
2547 stq_p(buf, val);
2548 break;
2549 case 4:
2550 /* 32 bit read access */
2551 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2552 attrs);
2553 stl_p(buf, val);
2554 break;
2555 case 2:
2556 /* 16 bit read access */
2557 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2558 attrs);
2559 stw_p(buf, val);
2560 break;
2561 case 1:
2562 /* 8 bit read access */
2563 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2564 attrs);
2565 stb_p(buf, val);
2566 break;
2567 default:
2568 abort();
2570 } else {
2571 /* RAM case */
2572 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2573 memcpy(buf, ptr, l);
2577 if (release_lock) {
2578 qemu_mutex_unlock_iothread();
2579 release_lock = false;
2582 len -= l;
2583 buf += l;
2584 addr += l;
2586 rcu_read_unlock();
2588 return result;
2591 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2592 const uint8_t *buf, int len)
2594 return address_space_rw(as, addr, attrs, (uint8_t *)buf, len, true);
2597 MemTxResult address_space_read(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2598 uint8_t *buf, int len)
2600 return address_space_rw(as, addr, attrs, buf, len, false);
2604 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2605 int len, int is_write)
2607 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2608 buf, len, is_write);
2611 enum write_rom_type {
2612 WRITE_DATA,
2613 FLUSH_CACHE,
2616 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2617 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2619 hwaddr l;
2620 uint8_t *ptr;
2621 hwaddr addr1;
2622 MemoryRegion *mr;
2624 rcu_read_lock();
2625 while (len > 0) {
2626 l = len;
2627 mr = address_space_translate(as, addr, &addr1, &l, true);
2629 if (!(memory_region_is_ram(mr) ||
2630 memory_region_is_romd(mr))) {
2631 l = memory_access_size(mr, l, addr1);
2632 } else {
2633 addr1 += memory_region_get_ram_addr(mr);
2634 /* ROM/RAM case */
2635 ptr = qemu_get_ram_ptr(addr1);
2636 switch (type) {
2637 case WRITE_DATA:
2638 memcpy(ptr, buf, l);
2639 invalidate_and_set_dirty(mr, addr1, l);
2640 break;
2641 case FLUSH_CACHE:
2642 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2643 break;
2646 len -= l;
2647 buf += l;
2648 addr += l;
2650 rcu_read_unlock();
2653 /* used for ROM loading : can write in RAM and ROM */
2654 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2655 const uint8_t *buf, int len)
2657 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2660 void cpu_flush_icache_range(hwaddr start, int len)
2663 * This function should do the same thing as an icache flush that was
2664 * triggered from within the guest. For TCG we are always cache coherent,
2665 * so there is no need to flush anything. For KVM / Xen we need to flush
2666 * the host's instruction cache at least.
2668 if (tcg_enabled()) {
2669 return;
2672 cpu_physical_memory_write_rom_internal(&address_space_memory,
2673 start, NULL, len, FLUSH_CACHE);
2676 typedef struct {
2677 MemoryRegion *mr;
2678 void *buffer;
2679 hwaddr addr;
2680 hwaddr len;
2681 bool in_use;
2682 } BounceBuffer;
2684 static BounceBuffer bounce;
2686 typedef struct MapClient {
2687 QEMUBH *bh;
2688 QLIST_ENTRY(MapClient) link;
2689 } MapClient;
2691 QemuMutex map_client_list_lock;
2692 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2693 = QLIST_HEAD_INITIALIZER(map_client_list);
2695 static void cpu_unregister_map_client_do(MapClient *client)
2697 QLIST_REMOVE(client, link);
2698 g_free(client);
2701 static void cpu_notify_map_clients_locked(void)
2703 MapClient *client;
2705 while (!QLIST_EMPTY(&map_client_list)) {
2706 client = QLIST_FIRST(&map_client_list);
2707 qemu_bh_schedule(client->bh);
2708 cpu_unregister_map_client_do(client);
2712 void cpu_register_map_client(QEMUBH *bh)
2714 MapClient *client = g_malloc(sizeof(*client));
2716 qemu_mutex_lock(&map_client_list_lock);
2717 client->bh = bh;
2718 QLIST_INSERT_HEAD(&map_client_list, client, link);
2719 if (!atomic_read(&bounce.in_use)) {
2720 cpu_notify_map_clients_locked();
2722 qemu_mutex_unlock(&map_client_list_lock);
2725 void cpu_exec_init_all(void)
2727 qemu_mutex_init(&ram_list.mutex);
2728 memory_map_init();
2729 io_mem_init();
2730 qemu_mutex_init(&map_client_list_lock);
2733 void cpu_unregister_map_client(QEMUBH *bh)
2735 MapClient *client;
2737 qemu_mutex_lock(&map_client_list_lock);
2738 QLIST_FOREACH(client, &map_client_list, link) {
2739 if (client->bh == bh) {
2740 cpu_unregister_map_client_do(client);
2741 break;
2744 qemu_mutex_unlock(&map_client_list_lock);
2747 static void cpu_notify_map_clients(void)
2749 qemu_mutex_lock(&map_client_list_lock);
2750 cpu_notify_map_clients_locked();
2751 qemu_mutex_unlock(&map_client_list_lock);
2754 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2756 MemoryRegion *mr;
2757 hwaddr l, xlat;
2759 rcu_read_lock();
2760 while (len > 0) {
2761 l = len;
2762 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2763 if (!memory_access_is_direct(mr, is_write)) {
2764 l = memory_access_size(mr, l, addr);
2765 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2766 return false;
2770 len -= l;
2771 addr += l;
2773 rcu_read_unlock();
2774 return true;
2777 /* Map a physical memory region into a host virtual address.
2778 * May map a subset of the requested range, given by and returned in *plen.
2779 * May return NULL if resources needed to perform the mapping are exhausted.
2780 * Use only for reads OR writes - not for read-modify-write operations.
2781 * Use cpu_register_map_client() to know when retrying the map operation is
2782 * likely to succeed.
2784 void *address_space_map(AddressSpace *as,
2785 hwaddr addr,
2786 hwaddr *plen,
2787 bool is_write)
2789 hwaddr len = *plen;
2790 hwaddr done = 0;
2791 hwaddr l, xlat, base;
2792 MemoryRegion *mr, *this_mr;
2793 ram_addr_t raddr;
2795 if (len == 0) {
2796 return NULL;
2799 l = len;
2800 rcu_read_lock();
2801 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2803 if (!memory_access_is_direct(mr, is_write)) {
2804 if (atomic_xchg(&bounce.in_use, true)) {
2805 rcu_read_unlock();
2806 return NULL;
2808 /* Avoid unbounded allocations */
2809 l = MIN(l, TARGET_PAGE_SIZE);
2810 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2811 bounce.addr = addr;
2812 bounce.len = l;
2814 memory_region_ref(mr);
2815 bounce.mr = mr;
2816 if (!is_write) {
2817 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2818 bounce.buffer, l);
2821 rcu_read_unlock();
2822 *plen = l;
2823 return bounce.buffer;
2826 base = xlat;
2827 raddr = memory_region_get_ram_addr(mr);
2829 for (;;) {
2830 len -= l;
2831 addr += l;
2832 done += l;
2833 if (len == 0) {
2834 break;
2837 l = len;
2838 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2839 if (this_mr != mr || xlat != base + done) {
2840 break;
2844 memory_region_ref(mr);
2845 rcu_read_unlock();
2846 *plen = done;
2847 return qemu_ram_ptr_length(raddr + base, plen);
2850 /* Unmaps a memory region previously mapped by address_space_map().
2851 * Will also mark the memory as dirty if is_write == 1. access_len gives
2852 * the amount of memory that was actually read or written by the caller.
2854 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2855 int is_write, hwaddr access_len)
2857 if (buffer != bounce.buffer) {
2858 MemoryRegion *mr;
2859 ram_addr_t addr1;
2861 mr = qemu_ram_addr_from_host(buffer, &addr1);
2862 assert(mr != NULL);
2863 if (is_write) {
2864 invalidate_and_set_dirty(mr, addr1, access_len);
2866 if (xen_enabled()) {
2867 xen_invalidate_map_cache_entry(buffer);
2869 memory_region_unref(mr);
2870 return;
2872 if (is_write) {
2873 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
2874 bounce.buffer, access_len);
2876 qemu_vfree(bounce.buffer);
2877 bounce.buffer = NULL;
2878 memory_region_unref(bounce.mr);
2879 atomic_mb_set(&bounce.in_use, false);
2880 cpu_notify_map_clients();
2883 void *cpu_physical_memory_map(hwaddr addr,
2884 hwaddr *plen,
2885 int is_write)
2887 return address_space_map(&address_space_memory, addr, plen, is_write);
2890 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2891 int is_write, hwaddr access_len)
2893 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2896 /* warning: addr must be aligned */
2897 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
2898 MemTxAttrs attrs,
2899 MemTxResult *result,
2900 enum device_endian endian)
2902 uint8_t *ptr;
2903 uint64_t val;
2904 MemoryRegion *mr;
2905 hwaddr l = 4;
2906 hwaddr addr1;
2907 MemTxResult r;
2908 bool release_lock = false;
2910 rcu_read_lock();
2911 mr = address_space_translate(as, addr, &addr1, &l, false);
2912 if (l < 4 || !memory_access_is_direct(mr, false)) {
2913 release_lock |= prepare_mmio_access(mr);
2915 /* I/O case */
2916 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
2917 #if defined(TARGET_WORDS_BIGENDIAN)
2918 if (endian == DEVICE_LITTLE_ENDIAN) {
2919 val = bswap32(val);
2921 #else
2922 if (endian == DEVICE_BIG_ENDIAN) {
2923 val = bswap32(val);
2925 #endif
2926 } else {
2927 /* RAM case */
2928 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2929 & TARGET_PAGE_MASK)
2930 + addr1);
2931 switch (endian) {
2932 case DEVICE_LITTLE_ENDIAN:
2933 val = ldl_le_p(ptr);
2934 break;
2935 case DEVICE_BIG_ENDIAN:
2936 val = ldl_be_p(ptr);
2937 break;
2938 default:
2939 val = ldl_p(ptr);
2940 break;
2942 r = MEMTX_OK;
2944 if (result) {
2945 *result = r;
2947 if (release_lock) {
2948 qemu_mutex_unlock_iothread();
2950 rcu_read_unlock();
2951 return val;
2954 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
2955 MemTxAttrs attrs, MemTxResult *result)
2957 return address_space_ldl_internal(as, addr, attrs, result,
2958 DEVICE_NATIVE_ENDIAN);
2961 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
2962 MemTxAttrs attrs, MemTxResult *result)
2964 return address_space_ldl_internal(as, addr, attrs, result,
2965 DEVICE_LITTLE_ENDIAN);
2968 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
2969 MemTxAttrs attrs, MemTxResult *result)
2971 return address_space_ldl_internal(as, addr, attrs, result,
2972 DEVICE_BIG_ENDIAN);
2975 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2977 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2980 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2982 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2985 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2987 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2990 /* warning: addr must be aligned */
2991 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
2992 MemTxAttrs attrs,
2993 MemTxResult *result,
2994 enum device_endian endian)
2996 uint8_t *ptr;
2997 uint64_t val;
2998 MemoryRegion *mr;
2999 hwaddr l = 8;
3000 hwaddr addr1;
3001 MemTxResult r;
3002 bool release_lock = false;
3004 rcu_read_lock();
3005 mr = address_space_translate(as, addr, &addr1, &l,
3006 false);
3007 if (l < 8 || !memory_access_is_direct(mr, false)) {
3008 release_lock |= prepare_mmio_access(mr);
3010 /* I/O case */
3011 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3012 #if defined(TARGET_WORDS_BIGENDIAN)
3013 if (endian == DEVICE_LITTLE_ENDIAN) {
3014 val = bswap64(val);
3016 #else
3017 if (endian == DEVICE_BIG_ENDIAN) {
3018 val = bswap64(val);
3020 #endif
3021 } else {
3022 /* RAM case */
3023 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3024 & TARGET_PAGE_MASK)
3025 + addr1);
3026 switch (endian) {
3027 case DEVICE_LITTLE_ENDIAN:
3028 val = ldq_le_p(ptr);
3029 break;
3030 case DEVICE_BIG_ENDIAN:
3031 val = ldq_be_p(ptr);
3032 break;
3033 default:
3034 val = ldq_p(ptr);
3035 break;
3037 r = MEMTX_OK;
3039 if (result) {
3040 *result = r;
3042 if (release_lock) {
3043 qemu_mutex_unlock_iothread();
3045 rcu_read_unlock();
3046 return val;
3049 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3050 MemTxAttrs attrs, MemTxResult *result)
3052 return address_space_ldq_internal(as, addr, attrs, result,
3053 DEVICE_NATIVE_ENDIAN);
3056 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3057 MemTxAttrs attrs, MemTxResult *result)
3059 return address_space_ldq_internal(as, addr, attrs, result,
3060 DEVICE_LITTLE_ENDIAN);
3063 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3064 MemTxAttrs attrs, MemTxResult *result)
3066 return address_space_ldq_internal(as, addr, attrs, result,
3067 DEVICE_BIG_ENDIAN);
3070 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3072 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3075 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3077 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3080 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3082 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3085 /* XXX: optimize */
3086 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3087 MemTxAttrs attrs, MemTxResult *result)
3089 uint8_t val;
3090 MemTxResult r;
3092 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3093 if (result) {
3094 *result = r;
3096 return val;
3099 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3101 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3104 /* warning: addr must be aligned */
3105 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3106 hwaddr addr,
3107 MemTxAttrs attrs,
3108 MemTxResult *result,
3109 enum device_endian endian)
3111 uint8_t *ptr;
3112 uint64_t val;
3113 MemoryRegion *mr;
3114 hwaddr l = 2;
3115 hwaddr addr1;
3116 MemTxResult r;
3117 bool release_lock = false;
3119 rcu_read_lock();
3120 mr = address_space_translate(as, addr, &addr1, &l,
3121 false);
3122 if (l < 2 || !memory_access_is_direct(mr, false)) {
3123 release_lock |= prepare_mmio_access(mr);
3125 /* I/O case */
3126 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3127 #if defined(TARGET_WORDS_BIGENDIAN)
3128 if (endian == DEVICE_LITTLE_ENDIAN) {
3129 val = bswap16(val);
3131 #else
3132 if (endian == DEVICE_BIG_ENDIAN) {
3133 val = bswap16(val);
3135 #endif
3136 } else {
3137 /* RAM case */
3138 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3139 & TARGET_PAGE_MASK)
3140 + addr1);
3141 switch (endian) {
3142 case DEVICE_LITTLE_ENDIAN:
3143 val = lduw_le_p(ptr);
3144 break;
3145 case DEVICE_BIG_ENDIAN:
3146 val = lduw_be_p(ptr);
3147 break;
3148 default:
3149 val = lduw_p(ptr);
3150 break;
3152 r = MEMTX_OK;
3154 if (result) {
3155 *result = r;
3157 if (release_lock) {
3158 qemu_mutex_unlock_iothread();
3160 rcu_read_unlock();
3161 return val;
3164 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3165 MemTxAttrs attrs, MemTxResult *result)
3167 return address_space_lduw_internal(as, addr, attrs, result,
3168 DEVICE_NATIVE_ENDIAN);
3171 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3172 MemTxAttrs attrs, MemTxResult *result)
3174 return address_space_lduw_internal(as, addr, attrs, result,
3175 DEVICE_LITTLE_ENDIAN);
3178 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3179 MemTxAttrs attrs, MemTxResult *result)
3181 return address_space_lduw_internal(as, addr, attrs, result,
3182 DEVICE_BIG_ENDIAN);
3185 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3187 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3190 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3192 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3195 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3197 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3200 /* warning: addr must be aligned. The ram page is not masked as dirty
3201 and the code inside is not invalidated. It is useful if the dirty
3202 bits are used to track modified PTEs */
3203 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3204 MemTxAttrs attrs, MemTxResult *result)
3206 uint8_t *ptr;
3207 MemoryRegion *mr;
3208 hwaddr l = 4;
3209 hwaddr addr1;
3210 MemTxResult r;
3211 uint8_t dirty_log_mask;
3212 bool release_lock = false;
3214 rcu_read_lock();
3215 mr = address_space_translate(as, addr, &addr1, &l,
3216 true);
3217 if (l < 4 || !memory_access_is_direct(mr, true)) {
3218 release_lock |= prepare_mmio_access(mr);
3220 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3221 } else {
3222 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3223 ptr = qemu_get_ram_ptr(addr1);
3224 stl_p(ptr, val);
3226 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3227 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3228 cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3229 r = MEMTX_OK;
3231 if (result) {
3232 *result = r;
3234 if (release_lock) {
3235 qemu_mutex_unlock_iothread();
3237 rcu_read_unlock();
3240 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3242 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3245 /* warning: addr must be aligned */
3246 static inline void address_space_stl_internal(AddressSpace *as,
3247 hwaddr addr, uint32_t val,
3248 MemTxAttrs attrs,
3249 MemTxResult *result,
3250 enum device_endian endian)
3252 uint8_t *ptr;
3253 MemoryRegion *mr;
3254 hwaddr l = 4;
3255 hwaddr addr1;
3256 MemTxResult r;
3257 bool release_lock = false;
3259 rcu_read_lock();
3260 mr = address_space_translate(as, addr, &addr1, &l,
3261 true);
3262 if (l < 4 || !memory_access_is_direct(mr, true)) {
3263 release_lock |= prepare_mmio_access(mr);
3265 #if defined(TARGET_WORDS_BIGENDIAN)
3266 if (endian == DEVICE_LITTLE_ENDIAN) {
3267 val = bswap32(val);
3269 #else
3270 if (endian == DEVICE_BIG_ENDIAN) {
3271 val = bswap32(val);
3273 #endif
3274 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3275 } else {
3276 /* RAM case */
3277 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3278 ptr = qemu_get_ram_ptr(addr1);
3279 switch (endian) {
3280 case DEVICE_LITTLE_ENDIAN:
3281 stl_le_p(ptr, val);
3282 break;
3283 case DEVICE_BIG_ENDIAN:
3284 stl_be_p(ptr, val);
3285 break;
3286 default:
3287 stl_p(ptr, val);
3288 break;
3290 invalidate_and_set_dirty(mr, addr1, 4);
3291 r = MEMTX_OK;
3293 if (result) {
3294 *result = r;
3296 if (release_lock) {
3297 qemu_mutex_unlock_iothread();
3299 rcu_read_unlock();
3302 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3303 MemTxAttrs attrs, MemTxResult *result)
3305 address_space_stl_internal(as, addr, val, attrs, result,
3306 DEVICE_NATIVE_ENDIAN);
3309 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3310 MemTxAttrs attrs, MemTxResult *result)
3312 address_space_stl_internal(as, addr, val, attrs, result,
3313 DEVICE_LITTLE_ENDIAN);
3316 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3317 MemTxAttrs attrs, MemTxResult *result)
3319 address_space_stl_internal(as, addr, val, attrs, result,
3320 DEVICE_BIG_ENDIAN);
3323 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3325 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3328 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3330 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3333 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3335 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3338 /* XXX: optimize */
3339 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3340 MemTxAttrs attrs, MemTxResult *result)
3342 uint8_t v = val;
3343 MemTxResult r;
3345 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3346 if (result) {
3347 *result = r;
3351 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3353 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3356 /* warning: addr must be aligned */
3357 static inline void address_space_stw_internal(AddressSpace *as,
3358 hwaddr addr, uint32_t val,
3359 MemTxAttrs attrs,
3360 MemTxResult *result,
3361 enum device_endian endian)
3363 uint8_t *ptr;
3364 MemoryRegion *mr;
3365 hwaddr l = 2;
3366 hwaddr addr1;
3367 MemTxResult r;
3368 bool release_lock = false;
3370 rcu_read_lock();
3371 mr = address_space_translate(as, addr, &addr1, &l, true);
3372 if (l < 2 || !memory_access_is_direct(mr, true)) {
3373 release_lock |= prepare_mmio_access(mr);
3375 #if defined(TARGET_WORDS_BIGENDIAN)
3376 if (endian == DEVICE_LITTLE_ENDIAN) {
3377 val = bswap16(val);
3379 #else
3380 if (endian == DEVICE_BIG_ENDIAN) {
3381 val = bswap16(val);
3383 #endif
3384 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3385 } else {
3386 /* RAM case */
3387 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3388 ptr = qemu_get_ram_ptr(addr1);
3389 switch (endian) {
3390 case DEVICE_LITTLE_ENDIAN:
3391 stw_le_p(ptr, val);
3392 break;
3393 case DEVICE_BIG_ENDIAN:
3394 stw_be_p(ptr, val);
3395 break;
3396 default:
3397 stw_p(ptr, val);
3398 break;
3400 invalidate_and_set_dirty(mr, addr1, 2);
3401 r = MEMTX_OK;
3403 if (result) {
3404 *result = r;
3406 if (release_lock) {
3407 qemu_mutex_unlock_iothread();
3409 rcu_read_unlock();
3412 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3413 MemTxAttrs attrs, MemTxResult *result)
3415 address_space_stw_internal(as, addr, val, attrs, result,
3416 DEVICE_NATIVE_ENDIAN);
3419 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3420 MemTxAttrs attrs, MemTxResult *result)
3422 address_space_stw_internal(as, addr, val, attrs, result,
3423 DEVICE_LITTLE_ENDIAN);
3426 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3427 MemTxAttrs attrs, MemTxResult *result)
3429 address_space_stw_internal(as, addr, val, attrs, result,
3430 DEVICE_BIG_ENDIAN);
3433 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3435 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3438 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3440 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3443 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3445 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3448 /* XXX: optimize */
3449 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3450 MemTxAttrs attrs, MemTxResult *result)
3452 MemTxResult r;
3453 val = tswap64(val);
3454 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3455 if (result) {
3456 *result = r;
3460 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3461 MemTxAttrs attrs, MemTxResult *result)
3463 MemTxResult r;
3464 val = cpu_to_le64(val);
3465 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3466 if (result) {
3467 *result = r;
3470 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3471 MemTxAttrs attrs, MemTxResult *result)
3473 MemTxResult r;
3474 val = cpu_to_be64(val);
3475 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3476 if (result) {
3477 *result = r;
3481 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3483 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3486 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3488 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3491 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3493 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3496 /* virtual memory access for debug (includes writing to ROM) */
3497 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3498 uint8_t *buf, int len, int is_write)
3500 int l;
3501 hwaddr phys_addr;
3502 target_ulong page;
3504 while (len > 0) {
3505 page = addr & TARGET_PAGE_MASK;
3506 phys_addr = cpu_get_phys_page_debug(cpu, page);
3507 /* if no physical page mapped, return an error */
3508 if (phys_addr == -1)
3509 return -1;
3510 l = (page + TARGET_PAGE_SIZE) - addr;
3511 if (l > len)
3512 l = len;
3513 phys_addr += (addr & ~TARGET_PAGE_MASK);
3514 if (is_write) {
3515 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
3516 } else {
3517 address_space_rw(cpu->as, phys_addr, MEMTXATTRS_UNSPECIFIED,
3518 buf, l, 0);
3520 len -= l;
3521 buf += l;
3522 addr += l;
3524 return 0;
3526 #endif
3529 * A helper function for the _utterly broken_ virtio device model to find out if
3530 * it's running on a big endian machine. Don't do this at home kids!
3532 bool target_words_bigendian(void);
3533 bool target_words_bigendian(void)
3535 #if defined(TARGET_WORDS_BIGENDIAN)
3536 return true;
3537 #else
3538 return false;
3539 #endif
3542 #ifndef CONFIG_USER_ONLY
3543 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3545 MemoryRegion*mr;
3546 hwaddr l = 1;
3547 bool res;
3549 rcu_read_lock();
3550 mr = address_space_translate(&address_space_memory,
3551 phys_addr, &phys_addr, &l, false);
3553 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3554 rcu_read_unlock();
3555 return res;
3558 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3560 RAMBlock *block;
3561 int ret = 0;
3563 rcu_read_lock();
3564 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3565 ret = func(block->idstr, block->host, block->offset,
3566 block->used_length, opaque);
3567 if (ret) {
3568 break;
3571 rcu_read_unlock();
3572 return ret;
3574 #endif