virtio: introduce virtqueue_discard()
[qemu/kevin.git] / exec.c
blob47ada31040043565b88c89e9e466f2e8ee093816
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
31 #endif
32 #include "hw/qdev.h"
33 #include "qemu/osdep.h"
34 #include "sysemu/kvm.h"
35 #include "sysemu/sysemu.h"
36 #include "hw/xen/xen.h"
37 #include "qemu/timer.h"
38 #include "qemu/config-file.h"
39 #include "qemu/error-report.h"
40 #include "exec/memory.h"
41 #include "sysemu/dma.h"
42 #include "exec/address-spaces.h"
43 #if defined(CONFIG_USER_ONLY)
44 #include <qemu.h>
45 #else /* !CONFIG_USER_ONLY */
46 #include "sysemu/xen-mapcache.h"
47 #include "trace.h"
48 #endif
49 #include "exec/cpu-all.h"
50 #include "qemu/rcu_queue.h"
51 #include "qemu/main-loop.h"
52 #include "translate-all.h"
54 #include "exec/memory-internal.h"
55 #include "exec/ram_addr.h"
57 #include "qemu/range.h"
59 //#define DEBUG_SUBPAGE
61 #if !defined(CONFIG_USER_ONLY)
62 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
63 * are protected by the ramlist lock.
65 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
67 static MemoryRegion *system_memory;
68 static MemoryRegion *system_io;
70 AddressSpace address_space_io;
71 AddressSpace address_space_memory;
73 MemoryRegion io_mem_rom, io_mem_notdirty;
74 static MemoryRegion io_mem_unassigned;
76 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
77 #define RAM_PREALLOC (1 << 0)
79 /* RAM is mmap-ed with MAP_SHARED */
80 #define RAM_SHARED (1 << 1)
82 /* Only a portion of RAM (used_length) is actually used, and migrated.
83 * This used_length size can change across reboots.
85 #define RAM_RESIZEABLE (1 << 2)
87 #endif
89 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
90 /* current CPU in the current thread. It is only valid inside
91 cpu_exec() */
92 __thread CPUState *current_cpu;
93 /* 0 = Do not count executed instructions.
94 1 = Precise instruction counting.
95 2 = Adaptive rate instruction counting. */
96 int use_icount;
98 #if !defined(CONFIG_USER_ONLY)
100 typedef struct PhysPageEntry PhysPageEntry;
102 struct PhysPageEntry {
103 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
104 uint32_t skip : 6;
105 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
106 uint32_t ptr : 26;
109 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
111 /* Size of the L2 (and L3, etc) page tables. */
112 #define ADDR_SPACE_BITS 64
114 #define P_L2_BITS 9
115 #define P_L2_SIZE (1 << P_L2_BITS)
117 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
119 typedef PhysPageEntry Node[P_L2_SIZE];
121 typedef struct PhysPageMap {
122 struct rcu_head rcu;
124 unsigned sections_nb;
125 unsigned sections_nb_alloc;
126 unsigned nodes_nb;
127 unsigned nodes_nb_alloc;
128 Node *nodes;
129 MemoryRegionSection *sections;
130 } PhysPageMap;
132 struct AddressSpaceDispatch {
133 struct rcu_head rcu;
135 /* This is a multi-level map on the physical address space.
136 * The bottom level has pointers to MemoryRegionSections.
138 PhysPageEntry phys_map;
139 PhysPageMap map;
140 AddressSpace *as;
143 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
144 typedef struct subpage_t {
145 MemoryRegion iomem;
146 AddressSpace *as;
147 hwaddr base;
148 uint16_t sub_section[TARGET_PAGE_SIZE];
149 } subpage_t;
151 #define PHYS_SECTION_UNASSIGNED 0
152 #define PHYS_SECTION_NOTDIRTY 1
153 #define PHYS_SECTION_ROM 2
154 #define PHYS_SECTION_WATCH 3
156 static void io_mem_init(void);
157 static void memory_map_init(void);
158 static void tcg_commit(MemoryListener *listener);
160 static MemoryRegion io_mem_watch;
161 #endif
163 #if !defined(CONFIG_USER_ONLY)
165 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
167 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
168 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
169 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
170 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
174 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
176 unsigned i;
177 uint32_t ret;
178 PhysPageEntry e;
179 PhysPageEntry *p;
181 ret = map->nodes_nb++;
182 p = map->nodes[ret];
183 assert(ret != PHYS_MAP_NODE_NIL);
184 assert(ret != map->nodes_nb_alloc);
186 e.skip = leaf ? 0 : 1;
187 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
188 for (i = 0; i < P_L2_SIZE; ++i) {
189 memcpy(&p[i], &e, sizeof(e));
191 return ret;
194 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
195 hwaddr *index, hwaddr *nb, uint16_t leaf,
196 int level)
198 PhysPageEntry *p;
199 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
201 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
202 lp->ptr = phys_map_node_alloc(map, level == 0);
204 p = map->nodes[lp->ptr];
205 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
207 while (*nb && lp < &p[P_L2_SIZE]) {
208 if ((*index & (step - 1)) == 0 && *nb >= step) {
209 lp->skip = 0;
210 lp->ptr = leaf;
211 *index += step;
212 *nb -= step;
213 } else {
214 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
216 ++lp;
220 static void phys_page_set(AddressSpaceDispatch *d,
221 hwaddr index, hwaddr nb,
222 uint16_t leaf)
224 /* Wildly overreserve - it doesn't matter much. */
225 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
227 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
230 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
231 * and update our entry so we can skip it and go directly to the destination.
233 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
235 unsigned valid_ptr = P_L2_SIZE;
236 int valid = 0;
237 PhysPageEntry *p;
238 int i;
240 if (lp->ptr == PHYS_MAP_NODE_NIL) {
241 return;
244 p = nodes[lp->ptr];
245 for (i = 0; i < P_L2_SIZE; i++) {
246 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
247 continue;
250 valid_ptr = i;
251 valid++;
252 if (p[i].skip) {
253 phys_page_compact(&p[i], nodes, compacted);
257 /* We can only compress if there's only one child. */
258 if (valid != 1) {
259 return;
262 assert(valid_ptr < P_L2_SIZE);
264 /* Don't compress if it won't fit in the # of bits we have. */
265 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
266 return;
269 lp->ptr = p[valid_ptr].ptr;
270 if (!p[valid_ptr].skip) {
271 /* If our only child is a leaf, make this a leaf. */
272 /* By design, we should have made this node a leaf to begin with so we
273 * should never reach here.
274 * But since it's so simple to handle this, let's do it just in case we
275 * change this rule.
277 lp->skip = 0;
278 } else {
279 lp->skip += p[valid_ptr].skip;
283 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
285 DECLARE_BITMAP(compacted, nodes_nb);
287 if (d->phys_map.skip) {
288 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
292 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
293 Node *nodes, MemoryRegionSection *sections)
295 PhysPageEntry *p;
296 hwaddr index = addr >> TARGET_PAGE_BITS;
297 int i;
299 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
300 if (lp.ptr == PHYS_MAP_NODE_NIL) {
301 return &sections[PHYS_SECTION_UNASSIGNED];
303 p = nodes[lp.ptr];
304 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
307 if (sections[lp.ptr].size.hi ||
308 range_covers_byte(sections[lp.ptr].offset_within_address_space,
309 sections[lp.ptr].size.lo, addr)) {
310 return &sections[lp.ptr];
311 } else {
312 return &sections[PHYS_SECTION_UNASSIGNED];
316 bool memory_region_is_unassigned(MemoryRegion *mr)
318 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
319 && mr != &io_mem_watch;
322 /* Called from RCU critical section */
323 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
324 hwaddr addr,
325 bool resolve_subpage)
327 MemoryRegionSection *section;
328 subpage_t *subpage;
330 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
331 if (resolve_subpage && section->mr->subpage) {
332 subpage = container_of(section->mr, subpage_t, iomem);
333 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
335 return section;
338 /* Called from RCU critical section */
339 static MemoryRegionSection *
340 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
341 hwaddr *plen, bool resolve_subpage)
343 MemoryRegionSection *section;
344 MemoryRegion *mr;
345 Int128 diff;
347 section = address_space_lookup_region(d, addr, resolve_subpage);
348 /* Compute offset within MemoryRegionSection */
349 addr -= section->offset_within_address_space;
351 /* Compute offset within MemoryRegion */
352 *xlat = addr + section->offset_within_region;
354 mr = section->mr;
356 /* MMIO registers can be expected to perform full-width accesses based only
357 * on their address, without considering adjacent registers that could
358 * decode to completely different MemoryRegions. When such registers
359 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
360 * regions overlap wildly. For this reason we cannot clamp the accesses
361 * here.
363 * If the length is small (as is the case for address_space_ldl/stl),
364 * everything works fine. If the incoming length is large, however,
365 * the caller really has to do the clamping through memory_access_size.
367 if (memory_region_is_ram(mr)) {
368 diff = int128_sub(section->size, int128_make64(addr));
369 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
371 return section;
374 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
376 if (memory_region_is_ram(mr)) {
377 return !(is_write && mr->readonly);
379 if (memory_region_is_romd(mr)) {
380 return !is_write;
383 return false;
386 /* Called from RCU critical section */
387 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
388 hwaddr *xlat, hwaddr *plen,
389 bool is_write)
391 IOMMUTLBEntry iotlb;
392 MemoryRegionSection *section;
393 MemoryRegion *mr;
395 for (;;) {
396 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
397 section = address_space_translate_internal(d, addr, &addr, plen, true);
398 mr = section->mr;
400 if (!mr->iommu_ops) {
401 break;
404 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
405 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
406 | (addr & iotlb.addr_mask));
407 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
408 if (!(iotlb.perm & (1 << is_write))) {
409 mr = &io_mem_unassigned;
410 break;
413 as = iotlb.target_as;
416 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
417 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
418 *plen = MIN(page, *plen);
421 *xlat = addr;
422 return mr;
425 /* Called from RCU critical section */
426 MemoryRegionSection *
427 address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
428 hwaddr *xlat, hwaddr *plen)
430 MemoryRegionSection *section;
431 section = address_space_translate_internal(cpu->memory_dispatch,
432 addr, xlat, plen, false);
434 assert(!section->mr->iommu_ops);
435 return section;
437 #endif
439 #if !defined(CONFIG_USER_ONLY)
441 static int cpu_common_post_load(void *opaque, int version_id)
443 CPUState *cpu = opaque;
445 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
446 version_id is increased. */
447 cpu->interrupt_request &= ~0x01;
448 tlb_flush(cpu, 1);
450 return 0;
453 static int cpu_common_pre_load(void *opaque)
455 CPUState *cpu = opaque;
457 cpu->exception_index = -1;
459 return 0;
462 static bool cpu_common_exception_index_needed(void *opaque)
464 CPUState *cpu = opaque;
466 return tcg_enabled() && cpu->exception_index != -1;
469 static const VMStateDescription vmstate_cpu_common_exception_index = {
470 .name = "cpu_common/exception_index",
471 .version_id = 1,
472 .minimum_version_id = 1,
473 .needed = cpu_common_exception_index_needed,
474 .fields = (VMStateField[]) {
475 VMSTATE_INT32(exception_index, CPUState),
476 VMSTATE_END_OF_LIST()
480 static bool cpu_common_crash_occurred_needed(void *opaque)
482 CPUState *cpu = opaque;
484 return cpu->crash_occurred;
487 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
488 .name = "cpu_common/crash_occurred",
489 .version_id = 1,
490 .minimum_version_id = 1,
491 .needed = cpu_common_crash_occurred_needed,
492 .fields = (VMStateField[]) {
493 VMSTATE_BOOL(crash_occurred, CPUState),
494 VMSTATE_END_OF_LIST()
498 const VMStateDescription vmstate_cpu_common = {
499 .name = "cpu_common",
500 .version_id = 1,
501 .minimum_version_id = 1,
502 .pre_load = cpu_common_pre_load,
503 .post_load = cpu_common_post_load,
504 .fields = (VMStateField[]) {
505 VMSTATE_UINT32(halted, CPUState),
506 VMSTATE_UINT32(interrupt_request, CPUState),
507 VMSTATE_END_OF_LIST()
509 .subsections = (const VMStateDescription*[]) {
510 &vmstate_cpu_common_exception_index,
511 &vmstate_cpu_common_crash_occurred,
512 NULL
516 #endif
518 CPUState *qemu_get_cpu(int index)
520 CPUState *cpu;
522 CPU_FOREACH(cpu) {
523 if (cpu->cpu_index == index) {
524 return cpu;
528 return NULL;
531 #if !defined(CONFIG_USER_ONLY)
532 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
534 /* We only support one address space per cpu at the moment. */
535 assert(cpu->as == as);
537 if (cpu->tcg_as_listener) {
538 memory_listener_unregister(cpu->tcg_as_listener);
539 } else {
540 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
542 cpu->tcg_as_listener->commit = tcg_commit;
543 memory_listener_register(cpu->tcg_as_listener, as);
545 #endif
547 #ifndef CONFIG_USER_ONLY
548 static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
550 static int cpu_get_free_index(Error **errp)
552 int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
554 if (cpu >= MAX_CPUMASK_BITS) {
555 error_setg(errp, "Trying to use more CPUs than max of %d",
556 MAX_CPUMASK_BITS);
557 return -1;
560 bitmap_set(cpu_index_map, cpu, 1);
561 return cpu;
564 void cpu_exec_exit(CPUState *cpu)
566 if (cpu->cpu_index == -1) {
567 /* cpu_index was never allocated by this @cpu or was already freed. */
568 return;
571 bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
572 cpu->cpu_index = -1;
574 #else
576 static int cpu_get_free_index(Error **errp)
578 CPUState *some_cpu;
579 int cpu_index = 0;
581 CPU_FOREACH(some_cpu) {
582 cpu_index++;
584 return cpu_index;
587 void cpu_exec_exit(CPUState *cpu)
590 #endif
592 void cpu_exec_init(CPUState *cpu, Error **errp)
594 CPUClass *cc = CPU_GET_CLASS(cpu);
595 int cpu_index;
596 Error *local_err = NULL;
598 #ifndef CONFIG_USER_ONLY
599 cpu->as = &address_space_memory;
600 cpu->thread_id = qemu_get_thread_id();
601 cpu_reload_memory_map(cpu);
602 #endif
604 #if defined(CONFIG_USER_ONLY)
605 cpu_list_lock();
606 #endif
607 cpu_index = cpu->cpu_index = cpu_get_free_index(&local_err);
608 if (local_err) {
609 error_propagate(errp, local_err);
610 #if defined(CONFIG_USER_ONLY)
611 cpu_list_unlock();
612 #endif
613 return;
615 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
616 #if defined(CONFIG_USER_ONLY)
617 cpu_list_unlock();
618 #endif
619 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
620 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
622 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
623 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
624 cpu_save, cpu_load, cpu->env_ptr);
625 assert(cc->vmsd == NULL);
626 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
627 #endif
628 if (cc->vmsd != NULL) {
629 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
633 #if defined(CONFIG_USER_ONLY)
634 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
636 tb_invalidate_phys_page_range(pc, pc + 1, 0);
638 #else
639 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
641 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
642 if (phys != -1) {
643 tb_invalidate_phys_addr(cpu->as,
644 phys | (pc & ~TARGET_PAGE_MASK));
647 #endif
649 #if defined(CONFIG_USER_ONLY)
650 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
655 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
656 int flags)
658 return -ENOSYS;
661 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
665 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
666 int flags, CPUWatchpoint **watchpoint)
668 return -ENOSYS;
670 #else
671 /* Add a watchpoint. */
672 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
673 int flags, CPUWatchpoint **watchpoint)
675 CPUWatchpoint *wp;
677 /* forbid ranges which are empty or run off the end of the address space */
678 if (len == 0 || (addr + len - 1) < addr) {
679 error_report("tried to set invalid watchpoint at %"
680 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
681 return -EINVAL;
683 wp = g_malloc(sizeof(*wp));
685 wp->vaddr = addr;
686 wp->len = len;
687 wp->flags = flags;
689 /* keep all GDB-injected watchpoints in front */
690 if (flags & BP_GDB) {
691 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
692 } else {
693 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
696 tlb_flush_page(cpu, addr);
698 if (watchpoint)
699 *watchpoint = wp;
700 return 0;
703 /* Remove a specific watchpoint. */
704 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
705 int flags)
707 CPUWatchpoint *wp;
709 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
710 if (addr == wp->vaddr && len == wp->len
711 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
712 cpu_watchpoint_remove_by_ref(cpu, wp);
713 return 0;
716 return -ENOENT;
719 /* Remove a specific watchpoint by reference. */
720 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
722 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
724 tlb_flush_page(cpu, watchpoint->vaddr);
726 g_free(watchpoint);
729 /* Remove all matching watchpoints. */
730 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
732 CPUWatchpoint *wp, *next;
734 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
735 if (wp->flags & mask) {
736 cpu_watchpoint_remove_by_ref(cpu, wp);
741 /* Return true if this watchpoint address matches the specified
742 * access (ie the address range covered by the watchpoint overlaps
743 * partially or completely with the address range covered by the
744 * access).
746 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
747 vaddr addr,
748 vaddr len)
750 /* We know the lengths are non-zero, but a little caution is
751 * required to avoid errors in the case where the range ends
752 * exactly at the top of the address space and so addr + len
753 * wraps round to zero.
755 vaddr wpend = wp->vaddr + wp->len - 1;
756 vaddr addrend = addr + len - 1;
758 return !(addr > wpend || wp->vaddr > addrend);
761 #endif
763 /* Add a breakpoint. */
764 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
765 CPUBreakpoint **breakpoint)
767 CPUBreakpoint *bp;
769 bp = g_malloc(sizeof(*bp));
771 bp->pc = pc;
772 bp->flags = flags;
774 /* keep all GDB-injected breakpoints in front */
775 if (flags & BP_GDB) {
776 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
777 } else {
778 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
781 breakpoint_invalidate(cpu, pc);
783 if (breakpoint) {
784 *breakpoint = bp;
786 return 0;
789 /* Remove a specific breakpoint. */
790 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
792 CPUBreakpoint *bp;
794 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
795 if (bp->pc == pc && bp->flags == flags) {
796 cpu_breakpoint_remove_by_ref(cpu, bp);
797 return 0;
800 return -ENOENT;
803 /* Remove a specific breakpoint by reference. */
804 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
806 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
808 breakpoint_invalidate(cpu, breakpoint->pc);
810 g_free(breakpoint);
813 /* Remove all matching breakpoints. */
814 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
816 CPUBreakpoint *bp, *next;
818 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
819 if (bp->flags & mask) {
820 cpu_breakpoint_remove_by_ref(cpu, bp);
825 /* enable or disable single step mode. EXCP_DEBUG is returned by the
826 CPU loop after each instruction */
827 void cpu_single_step(CPUState *cpu, int enabled)
829 if (cpu->singlestep_enabled != enabled) {
830 cpu->singlestep_enabled = enabled;
831 if (kvm_enabled()) {
832 kvm_update_guest_debug(cpu, 0);
833 } else {
834 /* must flush all the translated code to avoid inconsistencies */
835 /* XXX: only flush what is necessary */
836 tb_flush(cpu);
841 void cpu_abort(CPUState *cpu, const char *fmt, ...)
843 va_list ap;
844 va_list ap2;
846 va_start(ap, fmt);
847 va_copy(ap2, ap);
848 fprintf(stderr, "qemu: fatal: ");
849 vfprintf(stderr, fmt, ap);
850 fprintf(stderr, "\n");
851 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
852 if (qemu_log_enabled()) {
853 qemu_log("qemu: fatal: ");
854 qemu_log_vprintf(fmt, ap2);
855 qemu_log("\n");
856 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
857 qemu_log_flush();
858 qemu_log_close();
860 va_end(ap2);
861 va_end(ap);
862 #if defined(CONFIG_USER_ONLY)
864 struct sigaction act;
865 sigfillset(&act.sa_mask);
866 act.sa_handler = SIG_DFL;
867 sigaction(SIGABRT, &act, NULL);
869 #endif
870 abort();
873 #if !defined(CONFIG_USER_ONLY)
874 /* Called from RCU critical section */
875 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
877 RAMBlock *block;
879 block = atomic_rcu_read(&ram_list.mru_block);
880 if (block && addr - block->offset < block->max_length) {
881 goto found;
883 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
884 if (addr - block->offset < block->max_length) {
885 goto found;
889 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
890 abort();
892 found:
893 /* It is safe to write mru_block outside the iothread lock. This
894 * is what happens:
896 * mru_block = xxx
897 * rcu_read_unlock()
898 * xxx removed from list
899 * rcu_read_lock()
900 * read mru_block
901 * mru_block = NULL;
902 * call_rcu(reclaim_ramblock, xxx);
903 * rcu_read_unlock()
905 * atomic_rcu_set is not needed here. The block was already published
906 * when it was placed into the list. Here we're just making an extra
907 * copy of the pointer.
909 ram_list.mru_block = block;
910 return block;
913 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
915 CPUState *cpu;
916 ram_addr_t start1;
917 RAMBlock *block;
918 ram_addr_t end;
920 end = TARGET_PAGE_ALIGN(start + length);
921 start &= TARGET_PAGE_MASK;
923 rcu_read_lock();
924 block = qemu_get_ram_block(start);
925 assert(block == qemu_get_ram_block(end - 1));
926 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
927 CPU_FOREACH(cpu) {
928 tlb_reset_dirty(cpu, start1, length);
930 rcu_read_unlock();
933 /* Note: start and end must be within the same ram block. */
934 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
935 ram_addr_t length,
936 unsigned client)
938 unsigned long end, page;
939 bool dirty;
941 if (length == 0) {
942 return false;
945 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
946 page = start >> TARGET_PAGE_BITS;
947 dirty = bitmap_test_and_clear_atomic(ram_list.dirty_memory[client],
948 page, end - page);
950 if (dirty && tcg_enabled()) {
951 tlb_reset_dirty_range_all(start, length);
954 return dirty;
957 /* Called from RCU critical section */
958 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
959 MemoryRegionSection *section,
960 target_ulong vaddr,
961 hwaddr paddr, hwaddr xlat,
962 int prot,
963 target_ulong *address)
965 hwaddr iotlb;
966 CPUWatchpoint *wp;
968 if (memory_region_is_ram(section->mr)) {
969 /* Normal RAM. */
970 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
971 + xlat;
972 if (!section->readonly) {
973 iotlb |= PHYS_SECTION_NOTDIRTY;
974 } else {
975 iotlb |= PHYS_SECTION_ROM;
977 } else {
978 AddressSpaceDispatch *d;
980 d = atomic_rcu_read(&section->address_space->dispatch);
981 iotlb = section - d->map.sections;
982 iotlb += xlat;
985 /* Make accesses to pages with watchpoints go via the
986 watchpoint trap routines. */
987 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
988 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
989 /* Avoid trapping reads of pages with a write breakpoint. */
990 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
991 iotlb = PHYS_SECTION_WATCH + paddr;
992 *address |= TLB_MMIO;
993 break;
998 return iotlb;
1000 #endif /* defined(CONFIG_USER_ONLY) */
1002 #if !defined(CONFIG_USER_ONLY)
1004 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1005 uint16_t section);
1006 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1008 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1009 qemu_anon_ram_alloc;
1012 * Set a custom physical guest memory alloator.
1013 * Accelerators with unusual needs may need this. Hopefully, we can
1014 * get rid of it eventually.
1016 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1018 phys_mem_alloc = alloc;
1021 static uint16_t phys_section_add(PhysPageMap *map,
1022 MemoryRegionSection *section)
1024 /* The physical section number is ORed with a page-aligned
1025 * pointer to produce the iotlb entries. Thus it should
1026 * never overflow into the page-aligned value.
1028 assert(map->sections_nb < TARGET_PAGE_SIZE);
1030 if (map->sections_nb == map->sections_nb_alloc) {
1031 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1032 map->sections = g_renew(MemoryRegionSection, map->sections,
1033 map->sections_nb_alloc);
1035 map->sections[map->sections_nb] = *section;
1036 memory_region_ref(section->mr);
1037 return map->sections_nb++;
1040 static void phys_section_destroy(MemoryRegion *mr)
1042 memory_region_unref(mr);
1044 if (mr->subpage) {
1045 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1046 object_unref(OBJECT(&subpage->iomem));
1047 g_free(subpage);
1051 static void phys_sections_free(PhysPageMap *map)
1053 while (map->sections_nb > 0) {
1054 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1055 phys_section_destroy(section->mr);
1057 g_free(map->sections);
1058 g_free(map->nodes);
1061 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1063 subpage_t *subpage;
1064 hwaddr base = section->offset_within_address_space
1065 & TARGET_PAGE_MASK;
1066 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1067 d->map.nodes, d->map.sections);
1068 MemoryRegionSection subsection = {
1069 .offset_within_address_space = base,
1070 .size = int128_make64(TARGET_PAGE_SIZE),
1072 hwaddr start, end;
1074 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1076 if (!(existing->mr->subpage)) {
1077 subpage = subpage_init(d->as, base);
1078 subsection.address_space = d->as;
1079 subsection.mr = &subpage->iomem;
1080 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1081 phys_section_add(&d->map, &subsection));
1082 } else {
1083 subpage = container_of(existing->mr, subpage_t, iomem);
1085 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1086 end = start + int128_get64(section->size) - 1;
1087 subpage_register(subpage, start, end,
1088 phys_section_add(&d->map, section));
1092 static void register_multipage(AddressSpaceDispatch *d,
1093 MemoryRegionSection *section)
1095 hwaddr start_addr = section->offset_within_address_space;
1096 uint16_t section_index = phys_section_add(&d->map, section);
1097 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1098 TARGET_PAGE_BITS));
1100 assert(num_pages);
1101 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1104 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1106 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1107 AddressSpaceDispatch *d = as->next_dispatch;
1108 MemoryRegionSection now = *section, remain = *section;
1109 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1111 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1112 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1113 - now.offset_within_address_space;
1115 now.size = int128_min(int128_make64(left), now.size);
1116 register_subpage(d, &now);
1117 } else {
1118 now.size = int128_zero();
1120 while (int128_ne(remain.size, now.size)) {
1121 remain.size = int128_sub(remain.size, now.size);
1122 remain.offset_within_address_space += int128_get64(now.size);
1123 remain.offset_within_region += int128_get64(now.size);
1124 now = remain;
1125 if (int128_lt(remain.size, page_size)) {
1126 register_subpage(d, &now);
1127 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1128 now.size = page_size;
1129 register_subpage(d, &now);
1130 } else {
1131 now.size = int128_and(now.size, int128_neg(page_size));
1132 register_multipage(d, &now);
1137 void qemu_flush_coalesced_mmio_buffer(void)
1139 if (kvm_enabled())
1140 kvm_flush_coalesced_mmio_buffer();
1143 void qemu_mutex_lock_ramlist(void)
1145 qemu_mutex_lock(&ram_list.mutex);
1148 void qemu_mutex_unlock_ramlist(void)
1150 qemu_mutex_unlock(&ram_list.mutex);
1153 #ifdef __linux__
1155 #include <sys/vfs.h>
1157 #define HUGETLBFS_MAGIC 0x958458f6
1159 static long gethugepagesize(const char *path, Error **errp)
1161 struct statfs fs;
1162 int ret;
1164 do {
1165 ret = statfs(path, &fs);
1166 } while (ret != 0 && errno == EINTR);
1168 if (ret != 0) {
1169 error_setg_errno(errp, errno, "failed to get page size of file %s",
1170 path);
1171 return 0;
1174 if (fs.f_type != HUGETLBFS_MAGIC)
1175 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1177 return fs.f_bsize;
1180 static void *file_ram_alloc(RAMBlock *block,
1181 ram_addr_t memory,
1182 const char *path,
1183 Error **errp)
1185 char *filename;
1186 char *sanitized_name;
1187 char *c;
1188 void *area = NULL;
1189 int fd;
1190 uint64_t hpagesize;
1191 Error *local_err = NULL;
1193 hpagesize = gethugepagesize(path, &local_err);
1194 if (local_err) {
1195 error_propagate(errp, local_err);
1196 goto error;
1198 block->mr->align = hpagesize;
1200 if (memory < hpagesize) {
1201 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1202 "or larger than huge page size 0x%" PRIx64,
1203 memory, hpagesize);
1204 goto error;
1207 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1208 error_setg(errp,
1209 "host lacks kvm mmu notifiers, -mem-path unsupported");
1210 goto error;
1213 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1214 sanitized_name = g_strdup(memory_region_name(block->mr));
1215 for (c = sanitized_name; *c != '\0'; c++) {
1216 if (*c == '/')
1217 *c = '_';
1220 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1221 sanitized_name);
1222 g_free(sanitized_name);
1224 fd = mkstemp(filename);
1225 if (fd < 0) {
1226 error_setg_errno(errp, errno,
1227 "unable to create backing store for hugepages");
1228 g_free(filename);
1229 goto error;
1231 unlink(filename);
1232 g_free(filename);
1234 memory = ROUND_UP(memory, hpagesize);
1237 * ftruncate is not supported by hugetlbfs in older
1238 * hosts, so don't bother bailing out on errors.
1239 * If anything goes wrong with it under other filesystems,
1240 * mmap will fail.
1242 if (ftruncate(fd, memory)) {
1243 perror("ftruncate");
1246 area = mmap(0, memory, PROT_READ | PROT_WRITE,
1247 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
1248 fd, 0);
1249 if (area == MAP_FAILED) {
1250 error_setg_errno(errp, errno,
1251 "unable to map backing store for hugepages");
1252 close(fd);
1253 goto error;
1256 if (mem_prealloc) {
1257 os_mem_prealloc(fd, area, memory);
1260 block->fd = fd;
1261 return area;
1263 error:
1264 if (mem_prealloc) {
1265 error_report("%s", error_get_pretty(*errp));
1266 exit(1);
1268 return NULL;
1270 #endif
1272 /* Called with the ramlist lock held. */
1273 static ram_addr_t find_ram_offset(ram_addr_t size)
1275 RAMBlock *block, *next_block;
1276 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1278 assert(size != 0); /* it would hand out same offset multiple times */
1280 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1281 return 0;
1284 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1285 ram_addr_t end, next = RAM_ADDR_MAX;
1287 end = block->offset + block->max_length;
1289 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1290 if (next_block->offset >= end) {
1291 next = MIN(next, next_block->offset);
1294 if (next - end >= size && next - end < mingap) {
1295 offset = end;
1296 mingap = next - end;
1300 if (offset == RAM_ADDR_MAX) {
1301 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1302 (uint64_t)size);
1303 abort();
1306 return offset;
1309 ram_addr_t last_ram_offset(void)
1311 RAMBlock *block;
1312 ram_addr_t last = 0;
1314 rcu_read_lock();
1315 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1316 last = MAX(last, block->offset + block->max_length);
1318 rcu_read_unlock();
1319 return last;
1322 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1324 int ret;
1326 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1327 if (!machine_dump_guest_core(current_machine)) {
1328 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1329 if (ret) {
1330 perror("qemu_madvise");
1331 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1332 "but dump_guest_core=off specified\n");
1337 /* Called within an RCU critical section, or while the ramlist lock
1338 * is held.
1340 static RAMBlock *find_ram_block(ram_addr_t addr)
1342 RAMBlock *block;
1344 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1345 if (block->offset == addr) {
1346 return block;
1350 return NULL;
1353 /* Called with iothread lock held. */
1354 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1356 RAMBlock *new_block, *block;
1358 rcu_read_lock();
1359 new_block = find_ram_block(addr);
1360 assert(new_block);
1361 assert(!new_block->idstr[0]);
1363 if (dev) {
1364 char *id = qdev_get_dev_path(dev);
1365 if (id) {
1366 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1367 g_free(id);
1370 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1372 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1373 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1374 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1375 new_block->idstr);
1376 abort();
1379 rcu_read_unlock();
1382 /* Called with iothread lock held. */
1383 void qemu_ram_unset_idstr(ram_addr_t addr)
1385 RAMBlock *block;
1387 /* FIXME: arch_init.c assumes that this is not called throughout
1388 * migration. Ignore the problem since hot-unplug during migration
1389 * does not work anyway.
1392 rcu_read_lock();
1393 block = find_ram_block(addr);
1394 if (block) {
1395 memset(block->idstr, 0, sizeof(block->idstr));
1397 rcu_read_unlock();
1400 static int memory_try_enable_merging(void *addr, size_t len)
1402 if (!machine_mem_merge(current_machine)) {
1403 /* disabled by the user */
1404 return 0;
1407 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1410 /* Only legal before guest might have detected the memory size: e.g. on
1411 * incoming migration, or right after reset.
1413 * As memory core doesn't know how is memory accessed, it is up to
1414 * resize callback to update device state and/or add assertions to detect
1415 * misuse, if necessary.
1417 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1419 RAMBlock *block = find_ram_block(base);
1421 assert(block);
1423 newsize = TARGET_PAGE_ALIGN(newsize);
1425 if (block->used_length == newsize) {
1426 return 0;
1429 if (!(block->flags & RAM_RESIZEABLE)) {
1430 error_setg_errno(errp, EINVAL,
1431 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1432 " in != 0x" RAM_ADDR_FMT, block->idstr,
1433 newsize, block->used_length);
1434 return -EINVAL;
1437 if (block->max_length < newsize) {
1438 error_setg_errno(errp, EINVAL,
1439 "Length too large: %s: 0x" RAM_ADDR_FMT
1440 " > 0x" RAM_ADDR_FMT, block->idstr,
1441 newsize, block->max_length);
1442 return -EINVAL;
1445 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1446 block->used_length = newsize;
1447 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1448 DIRTY_CLIENTS_ALL);
1449 memory_region_set_size(block->mr, newsize);
1450 if (block->resized) {
1451 block->resized(block->idstr, newsize, block->host);
1453 return 0;
1456 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1458 RAMBlock *block;
1459 RAMBlock *last_block = NULL;
1460 ram_addr_t old_ram_size, new_ram_size;
1462 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1464 qemu_mutex_lock_ramlist();
1465 new_block->offset = find_ram_offset(new_block->max_length);
1467 if (!new_block->host) {
1468 if (xen_enabled()) {
1469 xen_ram_alloc(new_block->offset, new_block->max_length,
1470 new_block->mr);
1471 } else {
1472 new_block->host = phys_mem_alloc(new_block->max_length,
1473 &new_block->mr->align);
1474 if (!new_block->host) {
1475 error_setg_errno(errp, errno,
1476 "cannot set up guest memory '%s'",
1477 memory_region_name(new_block->mr));
1478 qemu_mutex_unlock_ramlist();
1479 return -1;
1481 memory_try_enable_merging(new_block->host, new_block->max_length);
1485 new_ram_size = MAX(old_ram_size,
1486 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1487 if (new_ram_size > old_ram_size) {
1488 migration_bitmap_extend(old_ram_size, new_ram_size);
1490 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1491 * QLIST (which has an RCU-friendly variant) does not have insertion at
1492 * tail, so save the last element in last_block.
1494 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1495 last_block = block;
1496 if (block->max_length < new_block->max_length) {
1497 break;
1500 if (block) {
1501 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1502 } else if (last_block) {
1503 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1504 } else { /* list is empty */
1505 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1507 ram_list.mru_block = NULL;
1509 /* Write list before version */
1510 smp_wmb();
1511 ram_list.version++;
1512 qemu_mutex_unlock_ramlist();
1514 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1516 if (new_ram_size > old_ram_size) {
1517 int i;
1519 /* ram_list.dirty_memory[] is protected by the iothread lock. */
1520 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1521 ram_list.dirty_memory[i] =
1522 bitmap_zero_extend(ram_list.dirty_memory[i],
1523 old_ram_size, new_ram_size);
1526 cpu_physical_memory_set_dirty_range(new_block->offset,
1527 new_block->used_length,
1528 DIRTY_CLIENTS_ALL);
1530 if (new_block->host) {
1531 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1532 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1533 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1534 if (kvm_enabled()) {
1535 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1539 return new_block->offset;
1542 #ifdef __linux__
1543 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1544 bool share, const char *mem_path,
1545 Error **errp)
1547 RAMBlock *new_block;
1548 ram_addr_t addr;
1549 Error *local_err = NULL;
1551 if (xen_enabled()) {
1552 error_setg(errp, "-mem-path not supported with Xen");
1553 return -1;
1556 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1558 * file_ram_alloc() needs to allocate just like
1559 * phys_mem_alloc, but we haven't bothered to provide
1560 * a hook there.
1562 error_setg(errp,
1563 "-mem-path not supported with this accelerator");
1564 return -1;
1567 size = TARGET_PAGE_ALIGN(size);
1568 new_block = g_malloc0(sizeof(*new_block));
1569 new_block->mr = mr;
1570 new_block->used_length = size;
1571 new_block->max_length = size;
1572 new_block->flags = share ? RAM_SHARED : 0;
1573 new_block->host = file_ram_alloc(new_block, size,
1574 mem_path, errp);
1575 if (!new_block->host) {
1576 g_free(new_block);
1577 return -1;
1580 addr = ram_block_add(new_block, &local_err);
1581 if (local_err) {
1582 g_free(new_block);
1583 error_propagate(errp, local_err);
1584 return -1;
1586 return addr;
1588 #endif
1590 static
1591 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1592 void (*resized)(const char*,
1593 uint64_t length,
1594 void *host),
1595 void *host, bool resizeable,
1596 MemoryRegion *mr, Error **errp)
1598 RAMBlock *new_block;
1599 ram_addr_t addr;
1600 Error *local_err = NULL;
1602 size = TARGET_PAGE_ALIGN(size);
1603 max_size = TARGET_PAGE_ALIGN(max_size);
1604 new_block = g_malloc0(sizeof(*new_block));
1605 new_block->mr = mr;
1606 new_block->resized = resized;
1607 new_block->used_length = size;
1608 new_block->max_length = max_size;
1609 assert(max_size >= size);
1610 new_block->fd = -1;
1611 new_block->host = host;
1612 if (host) {
1613 new_block->flags |= RAM_PREALLOC;
1615 if (resizeable) {
1616 new_block->flags |= RAM_RESIZEABLE;
1618 addr = ram_block_add(new_block, &local_err);
1619 if (local_err) {
1620 g_free(new_block);
1621 error_propagate(errp, local_err);
1622 return -1;
1624 return addr;
1627 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1628 MemoryRegion *mr, Error **errp)
1630 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1633 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1635 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1638 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1639 void (*resized)(const char*,
1640 uint64_t length,
1641 void *host),
1642 MemoryRegion *mr, Error **errp)
1644 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1647 void qemu_ram_free_from_ptr(ram_addr_t addr)
1649 RAMBlock *block;
1651 qemu_mutex_lock_ramlist();
1652 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1653 if (addr == block->offset) {
1654 QLIST_REMOVE_RCU(block, next);
1655 ram_list.mru_block = NULL;
1656 /* Write list before version */
1657 smp_wmb();
1658 ram_list.version++;
1659 g_free_rcu(block, rcu);
1660 break;
1663 qemu_mutex_unlock_ramlist();
1666 static void reclaim_ramblock(RAMBlock *block)
1668 if (block->flags & RAM_PREALLOC) {
1670 } else if (xen_enabled()) {
1671 xen_invalidate_map_cache_entry(block->host);
1672 #ifndef _WIN32
1673 } else if (block->fd >= 0) {
1674 munmap(block->host, block->max_length);
1675 close(block->fd);
1676 #endif
1677 } else {
1678 qemu_anon_ram_free(block->host, block->max_length);
1680 g_free(block);
1683 void qemu_ram_free(ram_addr_t addr)
1685 RAMBlock *block;
1687 qemu_mutex_lock_ramlist();
1688 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1689 if (addr == block->offset) {
1690 QLIST_REMOVE_RCU(block, next);
1691 ram_list.mru_block = NULL;
1692 /* Write list before version */
1693 smp_wmb();
1694 ram_list.version++;
1695 call_rcu(block, reclaim_ramblock, rcu);
1696 break;
1699 qemu_mutex_unlock_ramlist();
1702 #ifndef _WIN32
1703 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1705 RAMBlock *block;
1706 ram_addr_t offset;
1707 int flags;
1708 void *area, *vaddr;
1710 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1711 offset = addr - block->offset;
1712 if (offset < block->max_length) {
1713 vaddr = ramblock_ptr(block, offset);
1714 if (block->flags & RAM_PREALLOC) {
1716 } else if (xen_enabled()) {
1717 abort();
1718 } else {
1719 flags = MAP_FIXED;
1720 if (block->fd >= 0) {
1721 flags |= (block->flags & RAM_SHARED ?
1722 MAP_SHARED : MAP_PRIVATE);
1723 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1724 flags, block->fd, offset);
1725 } else {
1727 * Remap needs to match alloc. Accelerators that
1728 * set phys_mem_alloc never remap. If they did,
1729 * we'd need a remap hook here.
1731 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1733 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1734 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1735 flags, -1, 0);
1737 if (area != vaddr) {
1738 fprintf(stderr, "Could not remap addr: "
1739 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1740 length, addr);
1741 exit(1);
1743 memory_try_enable_merging(vaddr, length);
1744 qemu_ram_setup_dump(vaddr, length);
1749 #endif /* !_WIN32 */
1751 int qemu_get_ram_fd(ram_addr_t addr)
1753 RAMBlock *block;
1754 int fd;
1756 rcu_read_lock();
1757 block = qemu_get_ram_block(addr);
1758 fd = block->fd;
1759 rcu_read_unlock();
1760 return fd;
1763 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1765 RAMBlock *block;
1766 void *ptr;
1768 rcu_read_lock();
1769 block = qemu_get_ram_block(addr);
1770 ptr = ramblock_ptr(block, 0);
1771 rcu_read_unlock();
1772 return ptr;
1775 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1776 * This should not be used for general purpose DMA. Use address_space_map
1777 * or address_space_rw instead. For local memory (e.g. video ram) that the
1778 * device owns, use memory_region_get_ram_ptr.
1780 * By the time this function returns, the returned pointer is not protected
1781 * by RCU anymore. If the caller is not within an RCU critical section and
1782 * does not hold the iothread lock, it must have other means of protecting the
1783 * pointer, such as a reference to the region that includes the incoming
1784 * ram_addr_t.
1786 void *qemu_get_ram_ptr(ram_addr_t addr)
1788 RAMBlock *block;
1789 void *ptr;
1791 rcu_read_lock();
1792 block = qemu_get_ram_block(addr);
1794 if (xen_enabled() && block->host == NULL) {
1795 /* We need to check if the requested address is in the RAM
1796 * because we don't want to map the entire memory in QEMU.
1797 * In that case just map until the end of the page.
1799 if (block->offset == 0) {
1800 ptr = xen_map_cache(addr, 0, 0);
1801 goto unlock;
1804 block->host = xen_map_cache(block->offset, block->max_length, 1);
1806 ptr = ramblock_ptr(block, addr - block->offset);
1808 unlock:
1809 rcu_read_unlock();
1810 return ptr;
1813 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1814 * but takes a size argument.
1816 * By the time this function returns, the returned pointer is not protected
1817 * by RCU anymore. If the caller is not within an RCU critical section and
1818 * does not hold the iothread lock, it must have other means of protecting the
1819 * pointer, such as a reference to the region that includes the incoming
1820 * ram_addr_t.
1822 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1824 void *ptr;
1825 if (*size == 0) {
1826 return NULL;
1828 if (xen_enabled()) {
1829 return xen_map_cache(addr, *size, 1);
1830 } else {
1831 RAMBlock *block;
1832 rcu_read_lock();
1833 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1834 if (addr - block->offset < block->max_length) {
1835 if (addr - block->offset + *size > block->max_length)
1836 *size = block->max_length - addr + block->offset;
1837 ptr = ramblock_ptr(block, addr - block->offset);
1838 rcu_read_unlock();
1839 return ptr;
1843 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1844 abort();
1848 /* Some of the softmmu routines need to translate from a host pointer
1849 * (typically a TLB entry) back to a ram offset.
1851 * By the time this function returns, the returned pointer is not protected
1852 * by RCU anymore. If the caller is not within an RCU critical section and
1853 * does not hold the iothread lock, it must have other means of protecting the
1854 * pointer, such as a reference to the region that includes the incoming
1855 * ram_addr_t.
1857 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1859 RAMBlock *block;
1860 uint8_t *host = ptr;
1861 MemoryRegion *mr;
1863 if (xen_enabled()) {
1864 rcu_read_lock();
1865 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1866 mr = qemu_get_ram_block(*ram_addr)->mr;
1867 rcu_read_unlock();
1868 return mr;
1871 rcu_read_lock();
1872 block = atomic_rcu_read(&ram_list.mru_block);
1873 if (block && block->host && host - block->host < block->max_length) {
1874 goto found;
1877 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1878 /* This case append when the block is not mapped. */
1879 if (block->host == NULL) {
1880 continue;
1882 if (host - block->host < block->max_length) {
1883 goto found;
1887 rcu_read_unlock();
1888 return NULL;
1890 found:
1891 *ram_addr = block->offset + (host - block->host);
1892 mr = block->mr;
1893 rcu_read_unlock();
1894 return mr;
1897 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1898 uint64_t val, unsigned size)
1900 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1901 tb_invalidate_phys_page_fast(ram_addr, size);
1903 switch (size) {
1904 case 1:
1905 stb_p(qemu_get_ram_ptr(ram_addr), val);
1906 break;
1907 case 2:
1908 stw_p(qemu_get_ram_ptr(ram_addr), val);
1909 break;
1910 case 4:
1911 stl_p(qemu_get_ram_ptr(ram_addr), val);
1912 break;
1913 default:
1914 abort();
1916 /* Set both VGA and migration bits for simplicity and to remove
1917 * the notdirty callback faster.
1919 cpu_physical_memory_set_dirty_range(ram_addr, size,
1920 DIRTY_CLIENTS_NOCODE);
1921 /* we remove the notdirty callback only if the code has been
1922 flushed */
1923 if (!cpu_physical_memory_is_clean(ram_addr)) {
1924 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
1928 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1929 unsigned size, bool is_write)
1931 return is_write;
1934 static const MemoryRegionOps notdirty_mem_ops = {
1935 .write = notdirty_mem_write,
1936 .valid.accepts = notdirty_mem_accepts,
1937 .endianness = DEVICE_NATIVE_ENDIAN,
1940 /* Generate a debug exception if a watchpoint has been hit. */
1941 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
1943 CPUState *cpu = current_cpu;
1944 CPUArchState *env = cpu->env_ptr;
1945 target_ulong pc, cs_base;
1946 target_ulong vaddr;
1947 CPUWatchpoint *wp;
1948 int cpu_flags;
1950 if (cpu->watchpoint_hit) {
1951 /* We re-entered the check after replacing the TB. Now raise
1952 * the debug interrupt so that is will trigger after the
1953 * current instruction. */
1954 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1955 return;
1957 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1958 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1959 if (cpu_watchpoint_address_matches(wp, vaddr, len)
1960 && (wp->flags & flags)) {
1961 if (flags == BP_MEM_READ) {
1962 wp->flags |= BP_WATCHPOINT_HIT_READ;
1963 } else {
1964 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
1966 wp->hitaddr = vaddr;
1967 wp->hitattrs = attrs;
1968 if (!cpu->watchpoint_hit) {
1969 cpu->watchpoint_hit = wp;
1970 tb_check_watchpoint(cpu);
1971 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1972 cpu->exception_index = EXCP_DEBUG;
1973 cpu_loop_exit(cpu);
1974 } else {
1975 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1976 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1977 cpu_resume_from_signal(cpu, NULL);
1980 } else {
1981 wp->flags &= ~BP_WATCHPOINT_HIT;
1986 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1987 so these check for a hit then pass through to the normal out-of-line
1988 phys routines. */
1989 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
1990 unsigned size, MemTxAttrs attrs)
1992 MemTxResult res;
1993 uint64_t data;
1995 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
1996 switch (size) {
1997 case 1:
1998 data = address_space_ldub(&address_space_memory, addr, attrs, &res);
1999 break;
2000 case 2:
2001 data = address_space_lduw(&address_space_memory, addr, attrs, &res);
2002 break;
2003 case 4:
2004 data = address_space_ldl(&address_space_memory, addr, attrs, &res);
2005 break;
2006 default: abort();
2008 *pdata = data;
2009 return res;
2012 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2013 uint64_t val, unsigned size,
2014 MemTxAttrs attrs)
2016 MemTxResult res;
2018 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2019 switch (size) {
2020 case 1:
2021 address_space_stb(&address_space_memory, addr, val, attrs, &res);
2022 break;
2023 case 2:
2024 address_space_stw(&address_space_memory, addr, val, attrs, &res);
2025 break;
2026 case 4:
2027 address_space_stl(&address_space_memory, addr, val, attrs, &res);
2028 break;
2029 default: abort();
2031 return res;
2034 static const MemoryRegionOps watch_mem_ops = {
2035 .read_with_attrs = watch_mem_read,
2036 .write_with_attrs = watch_mem_write,
2037 .endianness = DEVICE_NATIVE_ENDIAN,
2040 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2041 unsigned len, MemTxAttrs attrs)
2043 subpage_t *subpage = opaque;
2044 uint8_t buf[8];
2045 MemTxResult res;
2047 #if defined(DEBUG_SUBPAGE)
2048 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2049 subpage, len, addr);
2050 #endif
2051 res = address_space_read(subpage->as, addr + subpage->base,
2052 attrs, buf, len);
2053 if (res) {
2054 return res;
2056 switch (len) {
2057 case 1:
2058 *data = ldub_p(buf);
2059 return MEMTX_OK;
2060 case 2:
2061 *data = lduw_p(buf);
2062 return MEMTX_OK;
2063 case 4:
2064 *data = ldl_p(buf);
2065 return MEMTX_OK;
2066 case 8:
2067 *data = ldq_p(buf);
2068 return MEMTX_OK;
2069 default:
2070 abort();
2074 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2075 uint64_t value, unsigned len, MemTxAttrs attrs)
2077 subpage_t *subpage = opaque;
2078 uint8_t buf[8];
2080 #if defined(DEBUG_SUBPAGE)
2081 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2082 " value %"PRIx64"\n",
2083 __func__, subpage, len, addr, value);
2084 #endif
2085 switch (len) {
2086 case 1:
2087 stb_p(buf, value);
2088 break;
2089 case 2:
2090 stw_p(buf, value);
2091 break;
2092 case 4:
2093 stl_p(buf, value);
2094 break;
2095 case 8:
2096 stq_p(buf, value);
2097 break;
2098 default:
2099 abort();
2101 return address_space_write(subpage->as, addr + subpage->base,
2102 attrs, buf, len);
2105 static bool subpage_accepts(void *opaque, hwaddr addr,
2106 unsigned len, bool is_write)
2108 subpage_t *subpage = opaque;
2109 #if defined(DEBUG_SUBPAGE)
2110 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2111 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2112 #endif
2114 return address_space_access_valid(subpage->as, addr + subpage->base,
2115 len, is_write);
2118 static const MemoryRegionOps subpage_ops = {
2119 .read_with_attrs = subpage_read,
2120 .write_with_attrs = subpage_write,
2121 .impl.min_access_size = 1,
2122 .impl.max_access_size = 8,
2123 .valid.min_access_size = 1,
2124 .valid.max_access_size = 8,
2125 .valid.accepts = subpage_accepts,
2126 .endianness = DEVICE_NATIVE_ENDIAN,
2129 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2130 uint16_t section)
2132 int idx, eidx;
2134 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2135 return -1;
2136 idx = SUBPAGE_IDX(start);
2137 eidx = SUBPAGE_IDX(end);
2138 #if defined(DEBUG_SUBPAGE)
2139 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2140 __func__, mmio, start, end, idx, eidx, section);
2141 #endif
2142 for (; idx <= eidx; idx++) {
2143 mmio->sub_section[idx] = section;
2146 return 0;
2149 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2151 subpage_t *mmio;
2153 mmio = g_malloc0(sizeof(subpage_t));
2155 mmio->as = as;
2156 mmio->base = base;
2157 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2158 NULL, TARGET_PAGE_SIZE);
2159 mmio->iomem.subpage = true;
2160 #if defined(DEBUG_SUBPAGE)
2161 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2162 mmio, base, TARGET_PAGE_SIZE);
2163 #endif
2164 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2166 return mmio;
2169 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2170 MemoryRegion *mr)
2172 assert(as);
2173 MemoryRegionSection section = {
2174 .address_space = as,
2175 .mr = mr,
2176 .offset_within_address_space = 0,
2177 .offset_within_region = 0,
2178 .size = int128_2_64(),
2181 return phys_section_add(map, &section);
2184 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
2186 AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch);
2187 MemoryRegionSection *sections = d->map.sections;
2189 return sections[index & ~TARGET_PAGE_MASK].mr;
2192 static void io_mem_init(void)
2194 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2195 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2196 NULL, UINT64_MAX);
2197 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2198 NULL, UINT64_MAX);
2199 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2200 NULL, UINT64_MAX);
2203 static void mem_begin(MemoryListener *listener)
2205 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2206 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2207 uint16_t n;
2209 n = dummy_section(&d->map, as, &io_mem_unassigned);
2210 assert(n == PHYS_SECTION_UNASSIGNED);
2211 n = dummy_section(&d->map, as, &io_mem_notdirty);
2212 assert(n == PHYS_SECTION_NOTDIRTY);
2213 n = dummy_section(&d->map, as, &io_mem_rom);
2214 assert(n == PHYS_SECTION_ROM);
2215 n = dummy_section(&d->map, as, &io_mem_watch);
2216 assert(n == PHYS_SECTION_WATCH);
2218 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2219 d->as = as;
2220 as->next_dispatch = d;
2223 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2225 phys_sections_free(&d->map);
2226 g_free(d);
2229 static void mem_commit(MemoryListener *listener)
2231 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2232 AddressSpaceDispatch *cur = as->dispatch;
2233 AddressSpaceDispatch *next = as->next_dispatch;
2235 phys_page_compact_all(next, next->map.nodes_nb);
2237 atomic_rcu_set(&as->dispatch, next);
2238 if (cur) {
2239 call_rcu(cur, address_space_dispatch_free, rcu);
2243 static void tcg_commit(MemoryListener *listener)
2245 CPUState *cpu;
2247 /* since each CPU stores ram addresses in its TLB cache, we must
2248 reset the modified entries */
2249 /* XXX: slow ! */
2250 CPU_FOREACH(cpu) {
2251 /* FIXME: Disentangle the cpu.h circular files deps so we can
2252 directly get the right CPU from listener. */
2253 if (cpu->tcg_as_listener != listener) {
2254 continue;
2256 cpu_reload_memory_map(cpu);
2260 void address_space_init_dispatch(AddressSpace *as)
2262 as->dispatch = NULL;
2263 as->dispatch_listener = (MemoryListener) {
2264 .begin = mem_begin,
2265 .commit = mem_commit,
2266 .region_add = mem_add,
2267 .region_nop = mem_add,
2268 .priority = 0,
2270 memory_listener_register(&as->dispatch_listener, as);
2273 void address_space_unregister(AddressSpace *as)
2275 memory_listener_unregister(&as->dispatch_listener);
2278 void address_space_destroy_dispatch(AddressSpace *as)
2280 AddressSpaceDispatch *d = as->dispatch;
2282 atomic_rcu_set(&as->dispatch, NULL);
2283 if (d) {
2284 call_rcu(d, address_space_dispatch_free, rcu);
2288 static void memory_map_init(void)
2290 system_memory = g_malloc(sizeof(*system_memory));
2292 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2293 address_space_init(&address_space_memory, system_memory, "memory");
2295 system_io = g_malloc(sizeof(*system_io));
2296 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2297 65536);
2298 address_space_init(&address_space_io, system_io, "I/O");
2301 MemoryRegion *get_system_memory(void)
2303 return system_memory;
2306 MemoryRegion *get_system_io(void)
2308 return system_io;
2311 #endif /* !defined(CONFIG_USER_ONLY) */
2313 /* physical memory access (slow version, mainly for debug) */
2314 #if defined(CONFIG_USER_ONLY)
2315 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2316 uint8_t *buf, int len, int is_write)
2318 int l, flags;
2319 target_ulong page;
2320 void * p;
2322 while (len > 0) {
2323 page = addr & TARGET_PAGE_MASK;
2324 l = (page + TARGET_PAGE_SIZE) - addr;
2325 if (l > len)
2326 l = len;
2327 flags = page_get_flags(page);
2328 if (!(flags & PAGE_VALID))
2329 return -1;
2330 if (is_write) {
2331 if (!(flags & PAGE_WRITE))
2332 return -1;
2333 /* XXX: this code should not depend on lock_user */
2334 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2335 return -1;
2336 memcpy(p, buf, l);
2337 unlock_user(p, addr, l);
2338 } else {
2339 if (!(flags & PAGE_READ))
2340 return -1;
2341 /* XXX: this code should not depend on lock_user */
2342 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2343 return -1;
2344 memcpy(buf, p, l);
2345 unlock_user(p, addr, 0);
2347 len -= l;
2348 buf += l;
2349 addr += l;
2351 return 0;
2354 #else
2356 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2357 hwaddr length)
2359 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2360 /* No early return if dirty_log_mask is or becomes 0, because
2361 * cpu_physical_memory_set_dirty_range will still call
2362 * xen_modified_memory.
2364 if (dirty_log_mask) {
2365 dirty_log_mask =
2366 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2368 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2369 tb_invalidate_phys_range(addr, addr + length);
2370 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2372 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2375 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2377 unsigned access_size_max = mr->ops->valid.max_access_size;
2379 /* Regions are assumed to support 1-4 byte accesses unless
2380 otherwise specified. */
2381 if (access_size_max == 0) {
2382 access_size_max = 4;
2385 /* Bound the maximum access by the alignment of the address. */
2386 if (!mr->ops->impl.unaligned) {
2387 unsigned align_size_max = addr & -addr;
2388 if (align_size_max != 0 && align_size_max < access_size_max) {
2389 access_size_max = align_size_max;
2393 /* Don't attempt accesses larger than the maximum. */
2394 if (l > access_size_max) {
2395 l = access_size_max;
2397 l = pow2floor(l);
2399 return l;
2402 static bool prepare_mmio_access(MemoryRegion *mr)
2404 bool unlocked = !qemu_mutex_iothread_locked();
2405 bool release_lock = false;
2407 if (unlocked && mr->global_locking) {
2408 qemu_mutex_lock_iothread();
2409 unlocked = false;
2410 release_lock = true;
2412 if (mr->flush_coalesced_mmio) {
2413 if (unlocked) {
2414 qemu_mutex_lock_iothread();
2416 qemu_flush_coalesced_mmio_buffer();
2417 if (unlocked) {
2418 qemu_mutex_unlock_iothread();
2422 return release_lock;
2425 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2426 uint8_t *buf, int len, bool is_write)
2428 hwaddr l;
2429 uint8_t *ptr;
2430 uint64_t val;
2431 hwaddr addr1;
2432 MemoryRegion *mr;
2433 MemTxResult result = MEMTX_OK;
2434 bool release_lock = false;
2436 rcu_read_lock();
2437 while (len > 0) {
2438 l = len;
2439 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2441 if (is_write) {
2442 if (!memory_access_is_direct(mr, is_write)) {
2443 release_lock |= prepare_mmio_access(mr);
2444 l = memory_access_size(mr, l, addr1);
2445 /* XXX: could force current_cpu to NULL to avoid
2446 potential bugs */
2447 switch (l) {
2448 case 8:
2449 /* 64 bit write access */
2450 val = ldq_p(buf);
2451 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2452 attrs);
2453 break;
2454 case 4:
2455 /* 32 bit write access */
2456 val = ldl_p(buf);
2457 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2458 attrs);
2459 break;
2460 case 2:
2461 /* 16 bit write access */
2462 val = lduw_p(buf);
2463 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2464 attrs);
2465 break;
2466 case 1:
2467 /* 8 bit write access */
2468 val = ldub_p(buf);
2469 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2470 attrs);
2471 break;
2472 default:
2473 abort();
2475 } else {
2476 addr1 += memory_region_get_ram_addr(mr);
2477 /* RAM case */
2478 ptr = qemu_get_ram_ptr(addr1);
2479 memcpy(ptr, buf, l);
2480 invalidate_and_set_dirty(mr, addr1, l);
2482 } else {
2483 if (!memory_access_is_direct(mr, is_write)) {
2484 /* I/O case */
2485 release_lock |= prepare_mmio_access(mr);
2486 l = memory_access_size(mr, l, addr1);
2487 switch (l) {
2488 case 8:
2489 /* 64 bit read access */
2490 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2491 attrs);
2492 stq_p(buf, val);
2493 break;
2494 case 4:
2495 /* 32 bit read access */
2496 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2497 attrs);
2498 stl_p(buf, val);
2499 break;
2500 case 2:
2501 /* 16 bit read access */
2502 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2503 attrs);
2504 stw_p(buf, val);
2505 break;
2506 case 1:
2507 /* 8 bit read access */
2508 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2509 attrs);
2510 stb_p(buf, val);
2511 break;
2512 default:
2513 abort();
2515 } else {
2516 /* RAM case */
2517 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2518 memcpy(buf, ptr, l);
2522 if (release_lock) {
2523 qemu_mutex_unlock_iothread();
2524 release_lock = false;
2527 len -= l;
2528 buf += l;
2529 addr += l;
2531 rcu_read_unlock();
2533 return result;
2536 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2537 const uint8_t *buf, int len)
2539 return address_space_rw(as, addr, attrs, (uint8_t *)buf, len, true);
2542 MemTxResult address_space_read(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2543 uint8_t *buf, int len)
2545 return address_space_rw(as, addr, attrs, buf, len, false);
2549 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2550 int len, int is_write)
2552 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2553 buf, len, is_write);
2556 enum write_rom_type {
2557 WRITE_DATA,
2558 FLUSH_CACHE,
2561 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2562 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2564 hwaddr l;
2565 uint8_t *ptr;
2566 hwaddr addr1;
2567 MemoryRegion *mr;
2569 rcu_read_lock();
2570 while (len > 0) {
2571 l = len;
2572 mr = address_space_translate(as, addr, &addr1, &l, true);
2574 if (!(memory_region_is_ram(mr) ||
2575 memory_region_is_romd(mr))) {
2576 l = memory_access_size(mr, l, addr1);
2577 } else {
2578 addr1 += memory_region_get_ram_addr(mr);
2579 /* ROM/RAM case */
2580 ptr = qemu_get_ram_ptr(addr1);
2581 switch (type) {
2582 case WRITE_DATA:
2583 memcpy(ptr, buf, l);
2584 invalidate_and_set_dirty(mr, addr1, l);
2585 break;
2586 case FLUSH_CACHE:
2587 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2588 break;
2591 len -= l;
2592 buf += l;
2593 addr += l;
2595 rcu_read_unlock();
2598 /* used for ROM loading : can write in RAM and ROM */
2599 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2600 const uint8_t *buf, int len)
2602 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2605 void cpu_flush_icache_range(hwaddr start, int len)
2608 * This function should do the same thing as an icache flush that was
2609 * triggered from within the guest. For TCG we are always cache coherent,
2610 * so there is no need to flush anything. For KVM / Xen we need to flush
2611 * the host's instruction cache at least.
2613 if (tcg_enabled()) {
2614 return;
2617 cpu_physical_memory_write_rom_internal(&address_space_memory,
2618 start, NULL, len, FLUSH_CACHE);
2621 typedef struct {
2622 MemoryRegion *mr;
2623 void *buffer;
2624 hwaddr addr;
2625 hwaddr len;
2626 bool in_use;
2627 } BounceBuffer;
2629 static BounceBuffer bounce;
2631 typedef struct MapClient {
2632 QEMUBH *bh;
2633 QLIST_ENTRY(MapClient) link;
2634 } MapClient;
2636 QemuMutex map_client_list_lock;
2637 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2638 = QLIST_HEAD_INITIALIZER(map_client_list);
2640 static void cpu_unregister_map_client_do(MapClient *client)
2642 QLIST_REMOVE(client, link);
2643 g_free(client);
2646 static void cpu_notify_map_clients_locked(void)
2648 MapClient *client;
2650 while (!QLIST_EMPTY(&map_client_list)) {
2651 client = QLIST_FIRST(&map_client_list);
2652 qemu_bh_schedule(client->bh);
2653 cpu_unregister_map_client_do(client);
2657 void cpu_register_map_client(QEMUBH *bh)
2659 MapClient *client = g_malloc(sizeof(*client));
2661 qemu_mutex_lock(&map_client_list_lock);
2662 client->bh = bh;
2663 QLIST_INSERT_HEAD(&map_client_list, client, link);
2664 if (!atomic_read(&bounce.in_use)) {
2665 cpu_notify_map_clients_locked();
2667 qemu_mutex_unlock(&map_client_list_lock);
2670 void cpu_exec_init_all(void)
2672 qemu_mutex_init(&ram_list.mutex);
2673 memory_map_init();
2674 io_mem_init();
2675 qemu_mutex_init(&map_client_list_lock);
2678 void cpu_unregister_map_client(QEMUBH *bh)
2680 MapClient *client;
2682 qemu_mutex_lock(&map_client_list_lock);
2683 QLIST_FOREACH(client, &map_client_list, link) {
2684 if (client->bh == bh) {
2685 cpu_unregister_map_client_do(client);
2686 break;
2689 qemu_mutex_unlock(&map_client_list_lock);
2692 static void cpu_notify_map_clients(void)
2694 qemu_mutex_lock(&map_client_list_lock);
2695 cpu_notify_map_clients_locked();
2696 qemu_mutex_unlock(&map_client_list_lock);
2699 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2701 MemoryRegion *mr;
2702 hwaddr l, xlat;
2704 rcu_read_lock();
2705 while (len > 0) {
2706 l = len;
2707 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2708 if (!memory_access_is_direct(mr, is_write)) {
2709 l = memory_access_size(mr, l, addr);
2710 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2711 return false;
2715 len -= l;
2716 addr += l;
2718 rcu_read_unlock();
2719 return true;
2722 /* Map a physical memory region into a host virtual address.
2723 * May map a subset of the requested range, given by and returned in *plen.
2724 * May return NULL if resources needed to perform the mapping are exhausted.
2725 * Use only for reads OR writes - not for read-modify-write operations.
2726 * Use cpu_register_map_client() to know when retrying the map operation is
2727 * likely to succeed.
2729 void *address_space_map(AddressSpace *as,
2730 hwaddr addr,
2731 hwaddr *plen,
2732 bool is_write)
2734 hwaddr len = *plen;
2735 hwaddr done = 0;
2736 hwaddr l, xlat, base;
2737 MemoryRegion *mr, *this_mr;
2738 ram_addr_t raddr;
2740 if (len == 0) {
2741 return NULL;
2744 l = len;
2745 rcu_read_lock();
2746 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2748 if (!memory_access_is_direct(mr, is_write)) {
2749 if (atomic_xchg(&bounce.in_use, true)) {
2750 rcu_read_unlock();
2751 return NULL;
2753 /* Avoid unbounded allocations */
2754 l = MIN(l, TARGET_PAGE_SIZE);
2755 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2756 bounce.addr = addr;
2757 bounce.len = l;
2759 memory_region_ref(mr);
2760 bounce.mr = mr;
2761 if (!is_write) {
2762 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2763 bounce.buffer, l);
2766 rcu_read_unlock();
2767 *plen = l;
2768 return bounce.buffer;
2771 base = xlat;
2772 raddr = memory_region_get_ram_addr(mr);
2774 for (;;) {
2775 len -= l;
2776 addr += l;
2777 done += l;
2778 if (len == 0) {
2779 break;
2782 l = len;
2783 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2784 if (this_mr != mr || xlat != base + done) {
2785 break;
2789 memory_region_ref(mr);
2790 rcu_read_unlock();
2791 *plen = done;
2792 return qemu_ram_ptr_length(raddr + base, plen);
2795 /* Unmaps a memory region previously mapped by address_space_map().
2796 * Will also mark the memory as dirty if is_write == 1. access_len gives
2797 * the amount of memory that was actually read or written by the caller.
2799 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2800 int is_write, hwaddr access_len)
2802 if (buffer != bounce.buffer) {
2803 MemoryRegion *mr;
2804 ram_addr_t addr1;
2806 mr = qemu_ram_addr_from_host(buffer, &addr1);
2807 assert(mr != NULL);
2808 if (is_write) {
2809 invalidate_and_set_dirty(mr, addr1, access_len);
2811 if (xen_enabled()) {
2812 xen_invalidate_map_cache_entry(buffer);
2814 memory_region_unref(mr);
2815 return;
2817 if (is_write) {
2818 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
2819 bounce.buffer, access_len);
2821 qemu_vfree(bounce.buffer);
2822 bounce.buffer = NULL;
2823 memory_region_unref(bounce.mr);
2824 atomic_mb_set(&bounce.in_use, false);
2825 cpu_notify_map_clients();
2828 void *cpu_physical_memory_map(hwaddr addr,
2829 hwaddr *plen,
2830 int is_write)
2832 return address_space_map(&address_space_memory, addr, plen, is_write);
2835 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2836 int is_write, hwaddr access_len)
2838 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2841 /* warning: addr must be aligned */
2842 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
2843 MemTxAttrs attrs,
2844 MemTxResult *result,
2845 enum device_endian endian)
2847 uint8_t *ptr;
2848 uint64_t val;
2849 MemoryRegion *mr;
2850 hwaddr l = 4;
2851 hwaddr addr1;
2852 MemTxResult r;
2853 bool release_lock = false;
2855 rcu_read_lock();
2856 mr = address_space_translate(as, addr, &addr1, &l, false);
2857 if (l < 4 || !memory_access_is_direct(mr, false)) {
2858 release_lock |= prepare_mmio_access(mr);
2860 /* I/O case */
2861 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
2862 #if defined(TARGET_WORDS_BIGENDIAN)
2863 if (endian == DEVICE_LITTLE_ENDIAN) {
2864 val = bswap32(val);
2866 #else
2867 if (endian == DEVICE_BIG_ENDIAN) {
2868 val = bswap32(val);
2870 #endif
2871 } else {
2872 /* RAM case */
2873 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2874 & TARGET_PAGE_MASK)
2875 + addr1);
2876 switch (endian) {
2877 case DEVICE_LITTLE_ENDIAN:
2878 val = ldl_le_p(ptr);
2879 break;
2880 case DEVICE_BIG_ENDIAN:
2881 val = ldl_be_p(ptr);
2882 break;
2883 default:
2884 val = ldl_p(ptr);
2885 break;
2887 r = MEMTX_OK;
2889 if (result) {
2890 *result = r;
2892 if (release_lock) {
2893 qemu_mutex_unlock_iothread();
2895 rcu_read_unlock();
2896 return val;
2899 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
2900 MemTxAttrs attrs, MemTxResult *result)
2902 return address_space_ldl_internal(as, addr, attrs, result,
2903 DEVICE_NATIVE_ENDIAN);
2906 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
2907 MemTxAttrs attrs, MemTxResult *result)
2909 return address_space_ldl_internal(as, addr, attrs, result,
2910 DEVICE_LITTLE_ENDIAN);
2913 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
2914 MemTxAttrs attrs, MemTxResult *result)
2916 return address_space_ldl_internal(as, addr, attrs, result,
2917 DEVICE_BIG_ENDIAN);
2920 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2922 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2925 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2927 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2930 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2932 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2935 /* warning: addr must be aligned */
2936 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
2937 MemTxAttrs attrs,
2938 MemTxResult *result,
2939 enum device_endian endian)
2941 uint8_t *ptr;
2942 uint64_t val;
2943 MemoryRegion *mr;
2944 hwaddr l = 8;
2945 hwaddr addr1;
2946 MemTxResult r;
2947 bool release_lock = false;
2949 rcu_read_lock();
2950 mr = address_space_translate(as, addr, &addr1, &l,
2951 false);
2952 if (l < 8 || !memory_access_is_direct(mr, false)) {
2953 release_lock |= prepare_mmio_access(mr);
2955 /* I/O case */
2956 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
2957 #if defined(TARGET_WORDS_BIGENDIAN)
2958 if (endian == DEVICE_LITTLE_ENDIAN) {
2959 val = bswap64(val);
2961 #else
2962 if (endian == DEVICE_BIG_ENDIAN) {
2963 val = bswap64(val);
2965 #endif
2966 } else {
2967 /* RAM case */
2968 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2969 & TARGET_PAGE_MASK)
2970 + addr1);
2971 switch (endian) {
2972 case DEVICE_LITTLE_ENDIAN:
2973 val = ldq_le_p(ptr);
2974 break;
2975 case DEVICE_BIG_ENDIAN:
2976 val = ldq_be_p(ptr);
2977 break;
2978 default:
2979 val = ldq_p(ptr);
2980 break;
2982 r = MEMTX_OK;
2984 if (result) {
2985 *result = r;
2987 if (release_lock) {
2988 qemu_mutex_unlock_iothread();
2990 rcu_read_unlock();
2991 return val;
2994 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
2995 MemTxAttrs attrs, MemTxResult *result)
2997 return address_space_ldq_internal(as, addr, attrs, result,
2998 DEVICE_NATIVE_ENDIAN);
3001 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3002 MemTxAttrs attrs, MemTxResult *result)
3004 return address_space_ldq_internal(as, addr, attrs, result,
3005 DEVICE_LITTLE_ENDIAN);
3008 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3009 MemTxAttrs attrs, MemTxResult *result)
3011 return address_space_ldq_internal(as, addr, attrs, result,
3012 DEVICE_BIG_ENDIAN);
3015 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3017 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3020 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3022 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3025 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3027 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3030 /* XXX: optimize */
3031 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3032 MemTxAttrs attrs, MemTxResult *result)
3034 uint8_t val;
3035 MemTxResult r;
3037 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3038 if (result) {
3039 *result = r;
3041 return val;
3044 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3046 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3049 /* warning: addr must be aligned */
3050 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3051 hwaddr addr,
3052 MemTxAttrs attrs,
3053 MemTxResult *result,
3054 enum device_endian endian)
3056 uint8_t *ptr;
3057 uint64_t val;
3058 MemoryRegion *mr;
3059 hwaddr l = 2;
3060 hwaddr addr1;
3061 MemTxResult r;
3062 bool release_lock = false;
3064 rcu_read_lock();
3065 mr = address_space_translate(as, addr, &addr1, &l,
3066 false);
3067 if (l < 2 || !memory_access_is_direct(mr, false)) {
3068 release_lock |= prepare_mmio_access(mr);
3070 /* I/O case */
3071 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3072 #if defined(TARGET_WORDS_BIGENDIAN)
3073 if (endian == DEVICE_LITTLE_ENDIAN) {
3074 val = bswap16(val);
3076 #else
3077 if (endian == DEVICE_BIG_ENDIAN) {
3078 val = bswap16(val);
3080 #endif
3081 } else {
3082 /* RAM case */
3083 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3084 & TARGET_PAGE_MASK)
3085 + addr1);
3086 switch (endian) {
3087 case DEVICE_LITTLE_ENDIAN:
3088 val = lduw_le_p(ptr);
3089 break;
3090 case DEVICE_BIG_ENDIAN:
3091 val = lduw_be_p(ptr);
3092 break;
3093 default:
3094 val = lduw_p(ptr);
3095 break;
3097 r = MEMTX_OK;
3099 if (result) {
3100 *result = r;
3102 if (release_lock) {
3103 qemu_mutex_unlock_iothread();
3105 rcu_read_unlock();
3106 return val;
3109 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3110 MemTxAttrs attrs, MemTxResult *result)
3112 return address_space_lduw_internal(as, addr, attrs, result,
3113 DEVICE_NATIVE_ENDIAN);
3116 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3117 MemTxAttrs attrs, MemTxResult *result)
3119 return address_space_lduw_internal(as, addr, attrs, result,
3120 DEVICE_LITTLE_ENDIAN);
3123 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3124 MemTxAttrs attrs, MemTxResult *result)
3126 return address_space_lduw_internal(as, addr, attrs, result,
3127 DEVICE_BIG_ENDIAN);
3130 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3132 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3135 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3137 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3140 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3142 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3145 /* warning: addr must be aligned. The ram page is not masked as dirty
3146 and the code inside is not invalidated. It is useful if the dirty
3147 bits are used to track modified PTEs */
3148 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3149 MemTxAttrs attrs, MemTxResult *result)
3151 uint8_t *ptr;
3152 MemoryRegion *mr;
3153 hwaddr l = 4;
3154 hwaddr addr1;
3155 MemTxResult r;
3156 uint8_t dirty_log_mask;
3157 bool release_lock = false;
3159 rcu_read_lock();
3160 mr = address_space_translate(as, addr, &addr1, &l,
3161 true);
3162 if (l < 4 || !memory_access_is_direct(mr, true)) {
3163 release_lock |= prepare_mmio_access(mr);
3165 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3166 } else {
3167 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3168 ptr = qemu_get_ram_ptr(addr1);
3169 stl_p(ptr, val);
3171 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3172 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3173 cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3174 r = MEMTX_OK;
3176 if (result) {
3177 *result = r;
3179 if (release_lock) {
3180 qemu_mutex_unlock_iothread();
3182 rcu_read_unlock();
3185 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3187 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3190 /* warning: addr must be aligned */
3191 static inline void address_space_stl_internal(AddressSpace *as,
3192 hwaddr addr, uint32_t val,
3193 MemTxAttrs attrs,
3194 MemTxResult *result,
3195 enum device_endian endian)
3197 uint8_t *ptr;
3198 MemoryRegion *mr;
3199 hwaddr l = 4;
3200 hwaddr addr1;
3201 MemTxResult r;
3202 bool release_lock = false;
3204 rcu_read_lock();
3205 mr = address_space_translate(as, addr, &addr1, &l,
3206 true);
3207 if (l < 4 || !memory_access_is_direct(mr, true)) {
3208 release_lock |= prepare_mmio_access(mr);
3210 #if defined(TARGET_WORDS_BIGENDIAN)
3211 if (endian == DEVICE_LITTLE_ENDIAN) {
3212 val = bswap32(val);
3214 #else
3215 if (endian == DEVICE_BIG_ENDIAN) {
3216 val = bswap32(val);
3218 #endif
3219 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3220 } else {
3221 /* RAM case */
3222 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3223 ptr = qemu_get_ram_ptr(addr1);
3224 switch (endian) {
3225 case DEVICE_LITTLE_ENDIAN:
3226 stl_le_p(ptr, val);
3227 break;
3228 case DEVICE_BIG_ENDIAN:
3229 stl_be_p(ptr, val);
3230 break;
3231 default:
3232 stl_p(ptr, val);
3233 break;
3235 invalidate_and_set_dirty(mr, addr1, 4);
3236 r = MEMTX_OK;
3238 if (result) {
3239 *result = r;
3241 if (release_lock) {
3242 qemu_mutex_unlock_iothread();
3244 rcu_read_unlock();
3247 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3248 MemTxAttrs attrs, MemTxResult *result)
3250 address_space_stl_internal(as, addr, val, attrs, result,
3251 DEVICE_NATIVE_ENDIAN);
3254 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3255 MemTxAttrs attrs, MemTxResult *result)
3257 address_space_stl_internal(as, addr, val, attrs, result,
3258 DEVICE_LITTLE_ENDIAN);
3261 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3262 MemTxAttrs attrs, MemTxResult *result)
3264 address_space_stl_internal(as, addr, val, attrs, result,
3265 DEVICE_BIG_ENDIAN);
3268 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3270 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3273 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3275 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3278 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3280 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3283 /* XXX: optimize */
3284 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3285 MemTxAttrs attrs, MemTxResult *result)
3287 uint8_t v = val;
3288 MemTxResult r;
3290 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3291 if (result) {
3292 *result = r;
3296 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3298 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3301 /* warning: addr must be aligned */
3302 static inline void address_space_stw_internal(AddressSpace *as,
3303 hwaddr addr, uint32_t val,
3304 MemTxAttrs attrs,
3305 MemTxResult *result,
3306 enum device_endian endian)
3308 uint8_t *ptr;
3309 MemoryRegion *mr;
3310 hwaddr l = 2;
3311 hwaddr addr1;
3312 MemTxResult r;
3313 bool release_lock = false;
3315 rcu_read_lock();
3316 mr = address_space_translate(as, addr, &addr1, &l, true);
3317 if (l < 2 || !memory_access_is_direct(mr, true)) {
3318 release_lock |= prepare_mmio_access(mr);
3320 #if defined(TARGET_WORDS_BIGENDIAN)
3321 if (endian == DEVICE_LITTLE_ENDIAN) {
3322 val = bswap16(val);
3324 #else
3325 if (endian == DEVICE_BIG_ENDIAN) {
3326 val = bswap16(val);
3328 #endif
3329 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3330 } else {
3331 /* RAM case */
3332 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3333 ptr = qemu_get_ram_ptr(addr1);
3334 switch (endian) {
3335 case DEVICE_LITTLE_ENDIAN:
3336 stw_le_p(ptr, val);
3337 break;
3338 case DEVICE_BIG_ENDIAN:
3339 stw_be_p(ptr, val);
3340 break;
3341 default:
3342 stw_p(ptr, val);
3343 break;
3345 invalidate_and_set_dirty(mr, addr1, 2);
3346 r = MEMTX_OK;
3348 if (result) {
3349 *result = r;
3351 if (release_lock) {
3352 qemu_mutex_unlock_iothread();
3354 rcu_read_unlock();
3357 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3358 MemTxAttrs attrs, MemTxResult *result)
3360 address_space_stw_internal(as, addr, val, attrs, result,
3361 DEVICE_NATIVE_ENDIAN);
3364 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3365 MemTxAttrs attrs, MemTxResult *result)
3367 address_space_stw_internal(as, addr, val, attrs, result,
3368 DEVICE_LITTLE_ENDIAN);
3371 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3372 MemTxAttrs attrs, MemTxResult *result)
3374 address_space_stw_internal(as, addr, val, attrs, result,
3375 DEVICE_BIG_ENDIAN);
3378 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3380 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3383 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3385 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3388 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3390 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3393 /* XXX: optimize */
3394 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3395 MemTxAttrs attrs, MemTxResult *result)
3397 MemTxResult r;
3398 val = tswap64(val);
3399 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3400 if (result) {
3401 *result = r;
3405 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3406 MemTxAttrs attrs, MemTxResult *result)
3408 MemTxResult r;
3409 val = cpu_to_le64(val);
3410 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3411 if (result) {
3412 *result = r;
3415 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3416 MemTxAttrs attrs, MemTxResult *result)
3418 MemTxResult r;
3419 val = cpu_to_be64(val);
3420 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3421 if (result) {
3422 *result = r;
3426 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3428 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3431 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3433 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3436 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3438 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3441 /* virtual memory access for debug (includes writing to ROM) */
3442 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3443 uint8_t *buf, int len, int is_write)
3445 int l;
3446 hwaddr phys_addr;
3447 target_ulong page;
3449 while (len > 0) {
3450 page = addr & TARGET_PAGE_MASK;
3451 phys_addr = cpu_get_phys_page_debug(cpu, page);
3452 /* if no physical page mapped, return an error */
3453 if (phys_addr == -1)
3454 return -1;
3455 l = (page + TARGET_PAGE_SIZE) - addr;
3456 if (l > len)
3457 l = len;
3458 phys_addr += (addr & ~TARGET_PAGE_MASK);
3459 if (is_write) {
3460 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
3461 } else {
3462 address_space_rw(cpu->as, phys_addr, MEMTXATTRS_UNSPECIFIED,
3463 buf, l, 0);
3465 len -= l;
3466 buf += l;
3467 addr += l;
3469 return 0;
3471 #endif
3474 * A helper function for the _utterly broken_ virtio device model to find out if
3475 * it's running on a big endian machine. Don't do this at home kids!
3477 bool target_words_bigendian(void);
3478 bool target_words_bigendian(void)
3480 #if defined(TARGET_WORDS_BIGENDIAN)
3481 return true;
3482 #else
3483 return false;
3484 #endif
3487 #ifndef CONFIG_USER_ONLY
3488 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3490 MemoryRegion*mr;
3491 hwaddr l = 1;
3492 bool res;
3494 rcu_read_lock();
3495 mr = address_space_translate(&address_space_memory,
3496 phys_addr, &phys_addr, &l, false);
3498 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3499 rcu_read_unlock();
3500 return res;
3503 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3505 RAMBlock *block;
3506 int ret = 0;
3508 rcu_read_lock();
3509 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3510 ret = func(block->idstr, block->host, block->offset,
3511 block->used_length, opaque);
3512 if (ret) {
3513 break;
3516 rcu_read_unlock();
3517 return ret;
3519 #endif