util: add linux-only memfd fallback
[qemu/ar7.git] / exec.c
blob4505dc76e87c054cf2cff63c13dffe192a1c2fdd
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
31 #endif
32 #include "hw/qdev.h"
33 #include "qemu/osdep.h"
34 #include "sysemu/kvm.h"
35 #include "sysemu/sysemu.h"
36 #include "hw/xen/xen.h"
37 #include "qemu/timer.h"
38 #include "qemu/config-file.h"
39 #include "qemu/error-report.h"
40 #include "exec/memory.h"
41 #include "sysemu/dma.h"
42 #include "exec/address-spaces.h"
43 #if defined(CONFIG_USER_ONLY)
44 #include <qemu.h>
45 #else /* !CONFIG_USER_ONLY */
46 #include "sysemu/xen-mapcache.h"
47 #include "trace.h"
48 #endif
49 #include "exec/cpu-all.h"
50 #include "qemu/rcu_queue.h"
51 #include "qemu/main-loop.h"
52 #include "translate-all.h"
54 #include "exec/memory-internal.h"
55 #include "exec/ram_addr.h"
57 #include "qemu/range.h"
58 #ifndef _WIN32
59 #include "qemu/mmap-alloc.h"
60 #endif
62 //#define DEBUG_SUBPAGE
64 #if !defined(CONFIG_USER_ONLY)
65 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
66 * are protected by the ramlist lock.
68 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
70 static MemoryRegion *system_memory;
71 static MemoryRegion *system_io;
73 AddressSpace address_space_io;
74 AddressSpace address_space_memory;
76 MemoryRegion io_mem_rom, io_mem_notdirty;
77 static MemoryRegion io_mem_unassigned;
79 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
80 #define RAM_PREALLOC (1 << 0)
82 /* RAM is mmap-ed with MAP_SHARED */
83 #define RAM_SHARED (1 << 1)
85 /* Only a portion of RAM (used_length) is actually used, and migrated.
86 * This used_length size can change across reboots.
88 #define RAM_RESIZEABLE (1 << 2)
90 /* RAM is backed by an mmapped file.
92 #define RAM_FILE (1 << 3)
93 #endif
95 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
96 /* current CPU in the current thread. It is only valid inside
97 cpu_exec() */
98 __thread CPUState *current_cpu;
99 /* 0 = Do not count executed instructions.
100 1 = Precise instruction counting.
101 2 = Adaptive rate instruction counting. */
102 int use_icount;
104 #if !defined(CONFIG_USER_ONLY)
106 typedef struct PhysPageEntry PhysPageEntry;
108 struct PhysPageEntry {
109 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
110 uint32_t skip : 6;
111 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
112 uint32_t ptr : 26;
115 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
117 /* Size of the L2 (and L3, etc) page tables. */
118 #define ADDR_SPACE_BITS 64
120 #define P_L2_BITS 9
121 #define P_L2_SIZE (1 << P_L2_BITS)
123 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
125 typedef PhysPageEntry Node[P_L2_SIZE];
127 typedef struct PhysPageMap {
128 struct rcu_head rcu;
130 unsigned sections_nb;
131 unsigned sections_nb_alloc;
132 unsigned nodes_nb;
133 unsigned nodes_nb_alloc;
134 Node *nodes;
135 MemoryRegionSection *sections;
136 } PhysPageMap;
138 struct AddressSpaceDispatch {
139 struct rcu_head rcu;
141 /* This is a multi-level map on the physical address space.
142 * The bottom level has pointers to MemoryRegionSections.
144 PhysPageEntry phys_map;
145 PhysPageMap map;
146 AddressSpace *as;
149 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
150 typedef struct subpage_t {
151 MemoryRegion iomem;
152 AddressSpace *as;
153 hwaddr base;
154 uint16_t sub_section[TARGET_PAGE_SIZE];
155 } subpage_t;
157 #define PHYS_SECTION_UNASSIGNED 0
158 #define PHYS_SECTION_NOTDIRTY 1
159 #define PHYS_SECTION_ROM 2
160 #define PHYS_SECTION_WATCH 3
162 static void io_mem_init(void);
163 static void memory_map_init(void);
164 static void tcg_commit(MemoryListener *listener);
166 static MemoryRegion io_mem_watch;
167 #endif
169 #if !defined(CONFIG_USER_ONLY)
171 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
173 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
174 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
175 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
176 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
180 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
182 unsigned i;
183 uint32_t ret;
184 PhysPageEntry e;
185 PhysPageEntry *p;
187 ret = map->nodes_nb++;
188 p = map->nodes[ret];
189 assert(ret != PHYS_MAP_NODE_NIL);
190 assert(ret != map->nodes_nb_alloc);
192 e.skip = leaf ? 0 : 1;
193 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
194 for (i = 0; i < P_L2_SIZE; ++i) {
195 memcpy(&p[i], &e, sizeof(e));
197 return ret;
200 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
201 hwaddr *index, hwaddr *nb, uint16_t leaf,
202 int level)
204 PhysPageEntry *p;
205 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
207 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
208 lp->ptr = phys_map_node_alloc(map, level == 0);
210 p = map->nodes[lp->ptr];
211 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
213 while (*nb && lp < &p[P_L2_SIZE]) {
214 if ((*index & (step - 1)) == 0 && *nb >= step) {
215 lp->skip = 0;
216 lp->ptr = leaf;
217 *index += step;
218 *nb -= step;
219 } else {
220 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
222 ++lp;
226 static void phys_page_set(AddressSpaceDispatch *d,
227 hwaddr index, hwaddr nb,
228 uint16_t leaf)
230 /* Wildly overreserve - it doesn't matter much. */
231 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
233 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
236 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
237 * and update our entry so we can skip it and go directly to the destination.
239 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
241 unsigned valid_ptr = P_L2_SIZE;
242 int valid = 0;
243 PhysPageEntry *p;
244 int i;
246 if (lp->ptr == PHYS_MAP_NODE_NIL) {
247 return;
250 p = nodes[lp->ptr];
251 for (i = 0; i < P_L2_SIZE; i++) {
252 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
253 continue;
256 valid_ptr = i;
257 valid++;
258 if (p[i].skip) {
259 phys_page_compact(&p[i], nodes, compacted);
263 /* We can only compress if there's only one child. */
264 if (valid != 1) {
265 return;
268 assert(valid_ptr < P_L2_SIZE);
270 /* Don't compress if it won't fit in the # of bits we have. */
271 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
272 return;
275 lp->ptr = p[valid_ptr].ptr;
276 if (!p[valid_ptr].skip) {
277 /* If our only child is a leaf, make this a leaf. */
278 /* By design, we should have made this node a leaf to begin with so we
279 * should never reach here.
280 * But since it's so simple to handle this, let's do it just in case we
281 * change this rule.
283 lp->skip = 0;
284 } else {
285 lp->skip += p[valid_ptr].skip;
289 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
291 DECLARE_BITMAP(compacted, nodes_nb);
293 if (d->phys_map.skip) {
294 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
298 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
299 Node *nodes, MemoryRegionSection *sections)
301 PhysPageEntry *p;
302 hwaddr index = addr >> TARGET_PAGE_BITS;
303 int i;
305 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
306 if (lp.ptr == PHYS_MAP_NODE_NIL) {
307 return &sections[PHYS_SECTION_UNASSIGNED];
309 p = nodes[lp.ptr];
310 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
313 if (sections[lp.ptr].size.hi ||
314 range_covers_byte(sections[lp.ptr].offset_within_address_space,
315 sections[lp.ptr].size.lo, addr)) {
316 return &sections[lp.ptr];
317 } else {
318 return &sections[PHYS_SECTION_UNASSIGNED];
322 bool memory_region_is_unassigned(MemoryRegion *mr)
324 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
325 && mr != &io_mem_watch;
328 /* Called from RCU critical section */
329 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
330 hwaddr addr,
331 bool resolve_subpage)
333 MemoryRegionSection *section;
334 subpage_t *subpage;
336 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
337 if (resolve_subpage && section->mr->subpage) {
338 subpage = container_of(section->mr, subpage_t, iomem);
339 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
341 return section;
344 /* Called from RCU critical section */
345 static MemoryRegionSection *
346 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
347 hwaddr *plen, bool resolve_subpage)
349 MemoryRegionSection *section;
350 MemoryRegion *mr;
351 Int128 diff;
353 section = address_space_lookup_region(d, addr, resolve_subpage);
354 /* Compute offset within MemoryRegionSection */
355 addr -= section->offset_within_address_space;
357 /* Compute offset within MemoryRegion */
358 *xlat = addr + section->offset_within_region;
360 mr = section->mr;
362 /* MMIO registers can be expected to perform full-width accesses based only
363 * on their address, without considering adjacent registers that could
364 * decode to completely different MemoryRegions. When such registers
365 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
366 * regions overlap wildly. For this reason we cannot clamp the accesses
367 * here.
369 * If the length is small (as is the case for address_space_ldl/stl),
370 * everything works fine. If the incoming length is large, however,
371 * the caller really has to do the clamping through memory_access_size.
373 if (memory_region_is_ram(mr)) {
374 diff = int128_sub(section->size, int128_make64(addr));
375 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
377 return section;
380 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
382 if (memory_region_is_ram(mr)) {
383 return !(is_write && mr->readonly);
385 if (memory_region_is_romd(mr)) {
386 return !is_write;
389 return false;
392 /* Called from RCU critical section */
393 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
394 hwaddr *xlat, hwaddr *plen,
395 bool is_write)
397 IOMMUTLBEntry iotlb;
398 MemoryRegionSection *section;
399 MemoryRegion *mr;
401 for (;;) {
402 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
403 section = address_space_translate_internal(d, addr, &addr, plen, true);
404 mr = section->mr;
406 if (!mr->iommu_ops) {
407 break;
410 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
411 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
412 | (addr & iotlb.addr_mask));
413 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
414 if (!(iotlb.perm & (1 << is_write))) {
415 mr = &io_mem_unassigned;
416 break;
419 as = iotlb.target_as;
422 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
423 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
424 *plen = MIN(page, *plen);
427 *xlat = addr;
428 return mr;
431 /* Called from RCU critical section */
432 MemoryRegionSection *
433 address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
434 hwaddr *xlat, hwaddr *plen)
436 MemoryRegionSection *section;
437 section = address_space_translate_internal(cpu->memory_dispatch,
438 addr, xlat, plen, false);
440 assert(!section->mr->iommu_ops);
441 return section;
443 #endif
445 #if !defined(CONFIG_USER_ONLY)
447 static int cpu_common_post_load(void *opaque, int version_id)
449 CPUState *cpu = opaque;
451 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
452 version_id is increased. */
453 cpu->interrupt_request &= ~0x01;
454 tlb_flush(cpu, 1);
456 return 0;
459 static int cpu_common_pre_load(void *opaque)
461 CPUState *cpu = opaque;
463 cpu->exception_index = -1;
465 return 0;
468 static bool cpu_common_exception_index_needed(void *opaque)
470 CPUState *cpu = opaque;
472 return tcg_enabled() && cpu->exception_index != -1;
475 static const VMStateDescription vmstate_cpu_common_exception_index = {
476 .name = "cpu_common/exception_index",
477 .version_id = 1,
478 .minimum_version_id = 1,
479 .needed = cpu_common_exception_index_needed,
480 .fields = (VMStateField[]) {
481 VMSTATE_INT32(exception_index, CPUState),
482 VMSTATE_END_OF_LIST()
486 static bool cpu_common_crash_occurred_needed(void *opaque)
488 CPUState *cpu = opaque;
490 return cpu->crash_occurred;
493 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
494 .name = "cpu_common/crash_occurred",
495 .version_id = 1,
496 .minimum_version_id = 1,
497 .needed = cpu_common_crash_occurred_needed,
498 .fields = (VMStateField[]) {
499 VMSTATE_BOOL(crash_occurred, CPUState),
500 VMSTATE_END_OF_LIST()
504 const VMStateDescription vmstate_cpu_common = {
505 .name = "cpu_common",
506 .version_id = 1,
507 .minimum_version_id = 1,
508 .pre_load = cpu_common_pre_load,
509 .post_load = cpu_common_post_load,
510 .fields = (VMStateField[]) {
511 VMSTATE_UINT32(halted, CPUState),
512 VMSTATE_UINT32(interrupt_request, CPUState),
513 VMSTATE_END_OF_LIST()
515 .subsections = (const VMStateDescription*[]) {
516 &vmstate_cpu_common_exception_index,
517 &vmstate_cpu_common_crash_occurred,
518 NULL
522 #endif
524 CPUState *qemu_get_cpu(int index)
526 CPUState *cpu;
528 CPU_FOREACH(cpu) {
529 if (cpu->cpu_index == index) {
530 return cpu;
534 return NULL;
537 #if !defined(CONFIG_USER_ONLY)
538 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
540 /* We only support one address space per cpu at the moment. */
541 assert(cpu->as == as);
543 if (cpu->tcg_as_listener) {
544 memory_listener_unregister(cpu->tcg_as_listener);
545 } else {
546 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
548 cpu->tcg_as_listener->commit = tcg_commit;
549 memory_listener_register(cpu->tcg_as_listener, as);
551 #endif
553 #ifndef CONFIG_USER_ONLY
554 static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
556 static int cpu_get_free_index(Error **errp)
558 int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
560 if (cpu >= MAX_CPUMASK_BITS) {
561 error_setg(errp, "Trying to use more CPUs than max of %d",
562 MAX_CPUMASK_BITS);
563 return -1;
566 bitmap_set(cpu_index_map, cpu, 1);
567 return cpu;
570 void cpu_exec_exit(CPUState *cpu)
572 if (cpu->cpu_index == -1) {
573 /* cpu_index was never allocated by this @cpu or was already freed. */
574 return;
577 bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
578 cpu->cpu_index = -1;
580 #else
582 static int cpu_get_free_index(Error **errp)
584 CPUState *some_cpu;
585 int cpu_index = 0;
587 CPU_FOREACH(some_cpu) {
588 cpu_index++;
590 return cpu_index;
593 void cpu_exec_exit(CPUState *cpu)
596 #endif
598 void cpu_exec_init(CPUState *cpu, Error **errp)
600 CPUClass *cc = CPU_GET_CLASS(cpu);
601 int cpu_index;
602 Error *local_err = NULL;
604 #ifndef CONFIG_USER_ONLY
605 cpu->as = &address_space_memory;
606 cpu->thread_id = qemu_get_thread_id();
607 cpu_reload_memory_map(cpu);
608 #endif
610 #if defined(CONFIG_USER_ONLY)
611 cpu_list_lock();
612 #endif
613 cpu_index = cpu->cpu_index = cpu_get_free_index(&local_err);
614 if (local_err) {
615 error_propagate(errp, local_err);
616 #if defined(CONFIG_USER_ONLY)
617 cpu_list_unlock();
618 #endif
619 return;
621 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
622 #if defined(CONFIG_USER_ONLY)
623 cpu_list_unlock();
624 #endif
625 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
626 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
628 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
629 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
630 cpu_save, cpu_load, cpu->env_ptr);
631 assert(cc->vmsd == NULL);
632 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
633 #endif
634 if (cc->vmsd != NULL) {
635 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
639 #if defined(CONFIG_USER_ONLY)
640 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
642 tb_invalidate_phys_page_range(pc, pc + 1, 0);
644 #else
645 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
647 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
648 if (phys != -1) {
649 tb_invalidate_phys_addr(cpu->as,
650 phys | (pc & ~TARGET_PAGE_MASK));
653 #endif
655 #if defined(CONFIG_USER_ONLY)
656 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
661 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
662 int flags)
664 return -ENOSYS;
667 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
671 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
672 int flags, CPUWatchpoint **watchpoint)
674 return -ENOSYS;
676 #else
677 /* Add a watchpoint. */
678 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
679 int flags, CPUWatchpoint **watchpoint)
681 CPUWatchpoint *wp;
683 /* forbid ranges which are empty or run off the end of the address space */
684 if (len == 0 || (addr + len - 1) < addr) {
685 error_report("tried to set invalid watchpoint at %"
686 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
687 return -EINVAL;
689 wp = g_malloc(sizeof(*wp));
691 wp->vaddr = addr;
692 wp->len = len;
693 wp->flags = flags;
695 /* keep all GDB-injected watchpoints in front */
696 if (flags & BP_GDB) {
697 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
698 } else {
699 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
702 tlb_flush_page(cpu, addr);
704 if (watchpoint)
705 *watchpoint = wp;
706 return 0;
709 /* Remove a specific watchpoint. */
710 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
711 int flags)
713 CPUWatchpoint *wp;
715 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
716 if (addr == wp->vaddr && len == wp->len
717 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
718 cpu_watchpoint_remove_by_ref(cpu, wp);
719 return 0;
722 return -ENOENT;
725 /* Remove a specific watchpoint by reference. */
726 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
728 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
730 tlb_flush_page(cpu, watchpoint->vaddr);
732 g_free(watchpoint);
735 /* Remove all matching watchpoints. */
736 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
738 CPUWatchpoint *wp, *next;
740 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
741 if (wp->flags & mask) {
742 cpu_watchpoint_remove_by_ref(cpu, wp);
747 /* Return true if this watchpoint address matches the specified
748 * access (ie the address range covered by the watchpoint overlaps
749 * partially or completely with the address range covered by the
750 * access).
752 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
753 vaddr addr,
754 vaddr len)
756 /* We know the lengths are non-zero, but a little caution is
757 * required to avoid errors in the case where the range ends
758 * exactly at the top of the address space and so addr + len
759 * wraps round to zero.
761 vaddr wpend = wp->vaddr + wp->len - 1;
762 vaddr addrend = addr + len - 1;
764 return !(addr > wpend || wp->vaddr > addrend);
767 #endif
769 /* Add a breakpoint. */
770 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
771 CPUBreakpoint **breakpoint)
773 CPUBreakpoint *bp;
775 bp = g_malloc(sizeof(*bp));
777 bp->pc = pc;
778 bp->flags = flags;
780 /* keep all GDB-injected breakpoints in front */
781 if (flags & BP_GDB) {
782 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
783 } else {
784 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
787 breakpoint_invalidate(cpu, pc);
789 if (breakpoint) {
790 *breakpoint = bp;
792 return 0;
795 /* Remove a specific breakpoint. */
796 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
798 CPUBreakpoint *bp;
800 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
801 if (bp->pc == pc && bp->flags == flags) {
802 cpu_breakpoint_remove_by_ref(cpu, bp);
803 return 0;
806 return -ENOENT;
809 /* Remove a specific breakpoint by reference. */
810 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
812 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
814 breakpoint_invalidate(cpu, breakpoint->pc);
816 g_free(breakpoint);
819 /* Remove all matching breakpoints. */
820 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
822 CPUBreakpoint *bp, *next;
824 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
825 if (bp->flags & mask) {
826 cpu_breakpoint_remove_by_ref(cpu, bp);
831 /* enable or disable single step mode. EXCP_DEBUG is returned by the
832 CPU loop after each instruction */
833 void cpu_single_step(CPUState *cpu, int enabled)
835 if (cpu->singlestep_enabled != enabled) {
836 cpu->singlestep_enabled = enabled;
837 if (kvm_enabled()) {
838 kvm_update_guest_debug(cpu, 0);
839 } else {
840 /* must flush all the translated code to avoid inconsistencies */
841 /* XXX: only flush what is necessary */
842 tb_flush(cpu);
847 void cpu_abort(CPUState *cpu, const char *fmt, ...)
849 va_list ap;
850 va_list ap2;
852 va_start(ap, fmt);
853 va_copy(ap2, ap);
854 fprintf(stderr, "qemu: fatal: ");
855 vfprintf(stderr, fmt, ap);
856 fprintf(stderr, "\n");
857 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
858 if (qemu_log_enabled()) {
859 qemu_log("qemu: fatal: ");
860 qemu_log_vprintf(fmt, ap2);
861 qemu_log("\n");
862 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
863 qemu_log_flush();
864 qemu_log_close();
866 va_end(ap2);
867 va_end(ap);
868 #if defined(CONFIG_USER_ONLY)
870 struct sigaction act;
871 sigfillset(&act.sa_mask);
872 act.sa_handler = SIG_DFL;
873 sigaction(SIGABRT, &act, NULL);
875 #endif
876 abort();
879 #if !defined(CONFIG_USER_ONLY)
880 /* Called from RCU critical section */
881 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
883 RAMBlock *block;
885 block = atomic_rcu_read(&ram_list.mru_block);
886 if (block && addr - block->offset < block->max_length) {
887 goto found;
889 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
890 if (addr - block->offset < block->max_length) {
891 goto found;
895 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
896 abort();
898 found:
899 /* It is safe to write mru_block outside the iothread lock. This
900 * is what happens:
902 * mru_block = xxx
903 * rcu_read_unlock()
904 * xxx removed from list
905 * rcu_read_lock()
906 * read mru_block
907 * mru_block = NULL;
908 * call_rcu(reclaim_ramblock, xxx);
909 * rcu_read_unlock()
911 * atomic_rcu_set is not needed here. The block was already published
912 * when it was placed into the list. Here we're just making an extra
913 * copy of the pointer.
915 ram_list.mru_block = block;
916 return block;
919 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
921 CPUState *cpu;
922 ram_addr_t start1;
923 RAMBlock *block;
924 ram_addr_t end;
926 end = TARGET_PAGE_ALIGN(start + length);
927 start &= TARGET_PAGE_MASK;
929 rcu_read_lock();
930 block = qemu_get_ram_block(start);
931 assert(block == qemu_get_ram_block(end - 1));
932 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
933 CPU_FOREACH(cpu) {
934 tlb_reset_dirty(cpu, start1, length);
936 rcu_read_unlock();
939 /* Note: start and end must be within the same ram block. */
940 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
941 ram_addr_t length,
942 unsigned client)
944 unsigned long end, page;
945 bool dirty;
947 if (length == 0) {
948 return false;
951 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
952 page = start >> TARGET_PAGE_BITS;
953 dirty = bitmap_test_and_clear_atomic(ram_list.dirty_memory[client],
954 page, end - page);
956 if (dirty && tcg_enabled()) {
957 tlb_reset_dirty_range_all(start, length);
960 return dirty;
963 /* Called from RCU critical section */
964 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
965 MemoryRegionSection *section,
966 target_ulong vaddr,
967 hwaddr paddr, hwaddr xlat,
968 int prot,
969 target_ulong *address)
971 hwaddr iotlb;
972 CPUWatchpoint *wp;
974 if (memory_region_is_ram(section->mr)) {
975 /* Normal RAM. */
976 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
977 + xlat;
978 if (!section->readonly) {
979 iotlb |= PHYS_SECTION_NOTDIRTY;
980 } else {
981 iotlb |= PHYS_SECTION_ROM;
983 } else {
984 AddressSpaceDispatch *d;
986 d = atomic_rcu_read(&section->address_space->dispatch);
987 iotlb = section - d->map.sections;
988 iotlb += xlat;
991 /* Make accesses to pages with watchpoints go via the
992 watchpoint trap routines. */
993 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
994 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
995 /* Avoid trapping reads of pages with a write breakpoint. */
996 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
997 iotlb = PHYS_SECTION_WATCH + paddr;
998 *address |= TLB_MMIO;
999 break;
1004 return iotlb;
1006 #endif /* defined(CONFIG_USER_ONLY) */
1008 #if !defined(CONFIG_USER_ONLY)
1010 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1011 uint16_t section);
1012 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1014 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1015 qemu_anon_ram_alloc;
1018 * Set a custom physical guest memory alloator.
1019 * Accelerators with unusual needs may need this. Hopefully, we can
1020 * get rid of it eventually.
1022 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1024 phys_mem_alloc = alloc;
1027 static uint16_t phys_section_add(PhysPageMap *map,
1028 MemoryRegionSection *section)
1030 /* The physical section number is ORed with a page-aligned
1031 * pointer to produce the iotlb entries. Thus it should
1032 * never overflow into the page-aligned value.
1034 assert(map->sections_nb < TARGET_PAGE_SIZE);
1036 if (map->sections_nb == map->sections_nb_alloc) {
1037 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1038 map->sections = g_renew(MemoryRegionSection, map->sections,
1039 map->sections_nb_alloc);
1041 map->sections[map->sections_nb] = *section;
1042 memory_region_ref(section->mr);
1043 return map->sections_nb++;
1046 static void phys_section_destroy(MemoryRegion *mr)
1048 memory_region_unref(mr);
1050 if (mr->subpage) {
1051 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1052 object_unref(OBJECT(&subpage->iomem));
1053 g_free(subpage);
1057 static void phys_sections_free(PhysPageMap *map)
1059 while (map->sections_nb > 0) {
1060 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1061 phys_section_destroy(section->mr);
1063 g_free(map->sections);
1064 g_free(map->nodes);
1067 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1069 subpage_t *subpage;
1070 hwaddr base = section->offset_within_address_space
1071 & TARGET_PAGE_MASK;
1072 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1073 d->map.nodes, d->map.sections);
1074 MemoryRegionSection subsection = {
1075 .offset_within_address_space = base,
1076 .size = int128_make64(TARGET_PAGE_SIZE),
1078 hwaddr start, end;
1080 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1082 if (!(existing->mr->subpage)) {
1083 subpage = subpage_init(d->as, base);
1084 subsection.address_space = d->as;
1085 subsection.mr = &subpage->iomem;
1086 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1087 phys_section_add(&d->map, &subsection));
1088 } else {
1089 subpage = container_of(existing->mr, subpage_t, iomem);
1091 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1092 end = start + int128_get64(section->size) - 1;
1093 subpage_register(subpage, start, end,
1094 phys_section_add(&d->map, section));
1098 static void register_multipage(AddressSpaceDispatch *d,
1099 MemoryRegionSection *section)
1101 hwaddr start_addr = section->offset_within_address_space;
1102 uint16_t section_index = phys_section_add(&d->map, section);
1103 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1104 TARGET_PAGE_BITS));
1106 assert(num_pages);
1107 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1110 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1112 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1113 AddressSpaceDispatch *d = as->next_dispatch;
1114 MemoryRegionSection now = *section, remain = *section;
1115 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1117 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1118 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1119 - now.offset_within_address_space;
1121 now.size = int128_min(int128_make64(left), now.size);
1122 register_subpage(d, &now);
1123 } else {
1124 now.size = int128_zero();
1126 while (int128_ne(remain.size, now.size)) {
1127 remain.size = int128_sub(remain.size, now.size);
1128 remain.offset_within_address_space += int128_get64(now.size);
1129 remain.offset_within_region += int128_get64(now.size);
1130 now = remain;
1131 if (int128_lt(remain.size, page_size)) {
1132 register_subpage(d, &now);
1133 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1134 now.size = page_size;
1135 register_subpage(d, &now);
1136 } else {
1137 now.size = int128_and(now.size, int128_neg(page_size));
1138 register_multipage(d, &now);
1143 void qemu_flush_coalesced_mmio_buffer(void)
1145 if (kvm_enabled())
1146 kvm_flush_coalesced_mmio_buffer();
1149 void qemu_mutex_lock_ramlist(void)
1151 qemu_mutex_lock(&ram_list.mutex);
1154 void qemu_mutex_unlock_ramlist(void)
1156 qemu_mutex_unlock(&ram_list.mutex);
1159 #ifdef __linux__
1161 #include <sys/vfs.h>
1163 #define HUGETLBFS_MAGIC 0x958458f6
1165 static long gethugepagesize(const char *path, Error **errp)
1167 struct statfs fs;
1168 int ret;
1170 do {
1171 ret = statfs(path, &fs);
1172 } while (ret != 0 && errno == EINTR);
1174 if (ret != 0) {
1175 error_setg_errno(errp, errno, "failed to get page size of file %s",
1176 path);
1177 return 0;
1180 if (fs.f_type != HUGETLBFS_MAGIC)
1181 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1183 return fs.f_bsize;
1186 static void *file_ram_alloc(RAMBlock *block,
1187 ram_addr_t memory,
1188 const char *path,
1189 Error **errp)
1191 char *filename;
1192 char *sanitized_name;
1193 char *c;
1194 void *area;
1195 int fd;
1196 uint64_t hpagesize;
1197 Error *local_err = NULL;
1199 hpagesize = gethugepagesize(path, &local_err);
1200 if (local_err) {
1201 error_propagate(errp, local_err);
1202 goto error;
1204 block->mr->align = hpagesize;
1206 if (memory < hpagesize) {
1207 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1208 "or larger than huge page size 0x%" PRIx64,
1209 memory, hpagesize);
1210 goto error;
1213 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1214 error_setg(errp,
1215 "host lacks kvm mmu notifiers, -mem-path unsupported");
1216 goto error;
1219 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1220 sanitized_name = g_strdup(memory_region_name(block->mr));
1221 for (c = sanitized_name; *c != '\0'; c++) {
1222 if (*c == '/')
1223 *c = '_';
1226 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1227 sanitized_name);
1228 g_free(sanitized_name);
1230 fd = mkstemp(filename);
1231 if (fd < 0) {
1232 error_setg_errno(errp, errno,
1233 "unable to create backing store for hugepages");
1234 g_free(filename);
1235 goto error;
1237 unlink(filename);
1238 g_free(filename);
1240 memory = ROUND_UP(memory, hpagesize);
1243 * ftruncate is not supported by hugetlbfs in older
1244 * hosts, so don't bother bailing out on errors.
1245 * If anything goes wrong with it under other filesystems,
1246 * mmap will fail.
1248 if (ftruncate(fd, memory)) {
1249 perror("ftruncate");
1252 area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
1253 if (area == MAP_FAILED) {
1254 error_setg_errno(errp, errno,
1255 "unable to map backing store for hugepages");
1256 close(fd);
1257 goto error;
1260 if (mem_prealloc) {
1261 os_mem_prealloc(fd, area, memory);
1264 block->fd = fd;
1265 return area;
1267 error:
1268 if (mem_prealloc) {
1269 error_report("%s", error_get_pretty(*errp));
1270 exit(1);
1272 return NULL;
1274 #endif
1276 /* Called with the ramlist lock held. */
1277 static ram_addr_t find_ram_offset(ram_addr_t size)
1279 RAMBlock *block, *next_block;
1280 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1282 assert(size != 0); /* it would hand out same offset multiple times */
1284 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1285 return 0;
1288 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1289 ram_addr_t end, next = RAM_ADDR_MAX;
1291 end = block->offset + block->max_length;
1293 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1294 if (next_block->offset >= end) {
1295 next = MIN(next, next_block->offset);
1298 if (next - end >= size && next - end < mingap) {
1299 offset = end;
1300 mingap = next - end;
1304 if (offset == RAM_ADDR_MAX) {
1305 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1306 (uint64_t)size);
1307 abort();
1310 return offset;
1313 ram_addr_t last_ram_offset(void)
1315 RAMBlock *block;
1316 ram_addr_t last = 0;
1318 rcu_read_lock();
1319 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1320 last = MAX(last, block->offset + block->max_length);
1322 rcu_read_unlock();
1323 return last;
1326 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1328 int ret;
1330 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1331 if (!machine_dump_guest_core(current_machine)) {
1332 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1333 if (ret) {
1334 perror("qemu_madvise");
1335 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1336 "but dump_guest_core=off specified\n");
1341 /* Called within an RCU critical section, or while the ramlist lock
1342 * is held.
1344 static RAMBlock *find_ram_block(ram_addr_t addr)
1346 RAMBlock *block;
1348 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1349 if (block->offset == addr) {
1350 return block;
1354 return NULL;
1357 /* Called with iothread lock held. */
1358 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1360 RAMBlock *new_block, *block;
1362 rcu_read_lock();
1363 new_block = find_ram_block(addr);
1364 assert(new_block);
1365 assert(!new_block->idstr[0]);
1367 if (dev) {
1368 char *id = qdev_get_dev_path(dev);
1369 if (id) {
1370 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1371 g_free(id);
1374 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1376 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1377 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1378 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1379 new_block->idstr);
1380 abort();
1383 rcu_read_unlock();
1386 /* Called with iothread lock held. */
1387 void qemu_ram_unset_idstr(ram_addr_t addr)
1389 RAMBlock *block;
1391 /* FIXME: arch_init.c assumes that this is not called throughout
1392 * migration. Ignore the problem since hot-unplug during migration
1393 * does not work anyway.
1396 rcu_read_lock();
1397 block = find_ram_block(addr);
1398 if (block) {
1399 memset(block->idstr, 0, sizeof(block->idstr));
1401 rcu_read_unlock();
1404 static int memory_try_enable_merging(void *addr, size_t len)
1406 if (!machine_mem_merge(current_machine)) {
1407 /* disabled by the user */
1408 return 0;
1411 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1414 /* Only legal before guest might have detected the memory size: e.g. on
1415 * incoming migration, or right after reset.
1417 * As memory core doesn't know how is memory accessed, it is up to
1418 * resize callback to update device state and/or add assertions to detect
1419 * misuse, if necessary.
1421 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1423 RAMBlock *block = find_ram_block(base);
1425 assert(block);
1427 newsize = TARGET_PAGE_ALIGN(newsize);
1429 if (block->used_length == newsize) {
1430 return 0;
1433 if (!(block->flags & RAM_RESIZEABLE)) {
1434 error_setg_errno(errp, EINVAL,
1435 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1436 " in != 0x" RAM_ADDR_FMT, block->idstr,
1437 newsize, block->used_length);
1438 return -EINVAL;
1441 if (block->max_length < newsize) {
1442 error_setg_errno(errp, EINVAL,
1443 "Length too large: %s: 0x" RAM_ADDR_FMT
1444 " > 0x" RAM_ADDR_FMT, block->idstr,
1445 newsize, block->max_length);
1446 return -EINVAL;
1449 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1450 block->used_length = newsize;
1451 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1452 DIRTY_CLIENTS_ALL);
1453 memory_region_set_size(block->mr, newsize);
1454 if (block->resized) {
1455 block->resized(block->idstr, newsize, block->host);
1457 return 0;
1460 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1462 RAMBlock *block;
1463 RAMBlock *last_block = NULL;
1464 ram_addr_t old_ram_size, new_ram_size;
1466 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1468 qemu_mutex_lock_ramlist();
1469 new_block->offset = find_ram_offset(new_block->max_length);
1471 if (!new_block->host) {
1472 if (xen_enabled()) {
1473 xen_ram_alloc(new_block->offset, new_block->max_length,
1474 new_block->mr);
1475 } else {
1476 new_block->host = phys_mem_alloc(new_block->max_length,
1477 &new_block->mr->align);
1478 if (!new_block->host) {
1479 error_setg_errno(errp, errno,
1480 "cannot set up guest memory '%s'",
1481 memory_region_name(new_block->mr));
1482 qemu_mutex_unlock_ramlist();
1483 return -1;
1485 memory_try_enable_merging(new_block->host, new_block->max_length);
1489 new_ram_size = MAX(old_ram_size,
1490 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1491 if (new_ram_size > old_ram_size) {
1492 migration_bitmap_extend(old_ram_size, new_ram_size);
1494 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1495 * QLIST (which has an RCU-friendly variant) does not have insertion at
1496 * tail, so save the last element in last_block.
1498 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1499 last_block = block;
1500 if (block->max_length < new_block->max_length) {
1501 break;
1504 if (block) {
1505 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1506 } else if (last_block) {
1507 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1508 } else { /* list is empty */
1509 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1511 ram_list.mru_block = NULL;
1513 /* Write list before version */
1514 smp_wmb();
1515 ram_list.version++;
1516 qemu_mutex_unlock_ramlist();
1518 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1520 if (new_ram_size > old_ram_size) {
1521 int i;
1523 /* ram_list.dirty_memory[] is protected by the iothread lock. */
1524 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1525 ram_list.dirty_memory[i] =
1526 bitmap_zero_extend(ram_list.dirty_memory[i],
1527 old_ram_size, new_ram_size);
1530 cpu_physical_memory_set_dirty_range(new_block->offset,
1531 new_block->used_length,
1532 DIRTY_CLIENTS_ALL);
1534 if (new_block->host) {
1535 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1536 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1537 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1538 if (kvm_enabled()) {
1539 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1543 return new_block->offset;
1546 #ifdef __linux__
1547 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1548 bool share, const char *mem_path,
1549 Error **errp)
1551 RAMBlock *new_block;
1552 ram_addr_t addr;
1553 Error *local_err = NULL;
1555 if (xen_enabled()) {
1556 error_setg(errp, "-mem-path not supported with Xen");
1557 return -1;
1560 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1562 * file_ram_alloc() needs to allocate just like
1563 * phys_mem_alloc, but we haven't bothered to provide
1564 * a hook there.
1566 error_setg(errp,
1567 "-mem-path not supported with this accelerator");
1568 return -1;
1571 size = TARGET_PAGE_ALIGN(size);
1572 new_block = g_malloc0(sizeof(*new_block));
1573 new_block->mr = mr;
1574 new_block->used_length = size;
1575 new_block->max_length = size;
1576 new_block->flags = share ? RAM_SHARED : 0;
1577 new_block->flags |= RAM_FILE;
1578 new_block->host = file_ram_alloc(new_block, size,
1579 mem_path, errp);
1580 if (!new_block->host) {
1581 g_free(new_block);
1582 return -1;
1585 addr = ram_block_add(new_block, &local_err);
1586 if (local_err) {
1587 g_free(new_block);
1588 error_propagate(errp, local_err);
1589 return -1;
1591 return addr;
1593 #endif
1595 static
1596 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1597 void (*resized)(const char*,
1598 uint64_t length,
1599 void *host),
1600 void *host, bool resizeable,
1601 MemoryRegion *mr, Error **errp)
1603 RAMBlock *new_block;
1604 ram_addr_t addr;
1605 Error *local_err = NULL;
1607 size = TARGET_PAGE_ALIGN(size);
1608 max_size = TARGET_PAGE_ALIGN(max_size);
1609 new_block = g_malloc0(sizeof(*new_block));
1610 new_block->mr = mr;
1611 new_block->resized = resized;
1612 new_block->used_length = size;
1613 new_block->max_length = max_size;
1614 assert(max_size >= size);
1615 new_block->fd = -1;
1616 new_block->host = host;
1617 if (host) {
1618 new_block->flags |= RAM_PREALLOC;
1620 if (resizeable) {
1621 new_block->flags |= RAM_RESIZEABLE;
1623 addr = ram_block_add(new_block, &local_err);
1624 if (local_err) {
1625 g_free(new_block);
1626 error_propagate(errp, local_err);
1627 return -1;
1629 return addr;
1632 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1633 MemoryRegion *mr, Error **errp)
1635 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1638 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1640 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1643 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1644 void (*resized)(const char*,
1645 uint64_t length,
1646 void *host),
1647 MemoryRegion *mr, Error **errp)
1649 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1652 void qemu_ram_free_from_ptr(ram_addr_t addr)
1654 RAMBlock *block;
1656 qemu_mutex_lock_ramlist();
1657 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1658 if (addr == block->offset) {
1659 QLIST_REMOVE_RCU(block, next);
1660 ram_list.mru_block = NULL;
1661 /* Write list before version */
1662 smp_wmb();
1663 ram_list.version++;
1664 g_free_rcu(block, rcu);
1665 break;
1668 qemu_mutex_unlock_ramlist();
1671 static void reclaim_ramblock(RAMBlock *block)
1673 if (block->flags & RAM_PREALLOC) {
1675 } else if (xen_enabled()) {
1676 xen_invalidate_map_cache_entry(block->host);
1677 #ifndef _WIN32
1678 } else if (block->fd >= 0) {
1679 if (block->flags & RAM_FILE) {
1680 qemu_ram_munmap(block->host, block->max_length);
1681 } else {
1682 munmap(block->host, block->max_length);
1684 close(block->fd);
1685 #endif
1686 } else {
1687 qemu_anon_ram_free(block->host, block->max_length);
1689 g_free(block);
1692 void qemu_ram_free(ram_addr_t addr)
1694 RAMBlock *block;
1696 qemu_mutex_lock_ramlist();
1697 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1698 if (addr == block->offset) {
1699 QLIST_REMOVE_RCU(block, next);
1700 ram_list.mru_block = NULL;
1701 /* Write list before version */
1702 smp_wmb();
1703 ram_list.version++;
1704 call_rcu(block, reclaim_ramblock, rcu);
1705 break;
1708 qemu_mutex_unlock_ramlist();
1711 #ifndef _WIN32
1712 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1714 RAMBlock *block;
1715 ram_addr_t offset;
1716 int flags;
1717 void *area, *vaddr;
1719 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1720 offset = addr - block->offset;
1721 if (offset < block->max_length) {
1722 vaddr = ramblock_ptr(block, offset);
1723 if (block->flags & RAM_PREALLOC) {
1725 } else if (xen_enabled()) {
1726 abort();
1727 } else {
1728 flags = MAP_FIXED;
1729 if (block->fd >= 0) {
1730 flags |= (block->flags & RAM_SHARED ?
1731 MAP_SHARED : MAP_PRIVATE);
1732 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1733 flags, block->fd, offset);
1734 } else {
1736 * Remap needs to match alloc. Accelerators that
1737 * set phys_mem_alloc never remap. If they did,
1738 * we'd need a remap hook here.
1740 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1742 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1743 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1744 flags, -1, 0);
1746 if (area != vaddr) {
1747 fprintf(stderr, "Could not remap addr: "
1748 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1749 length, addr);
1750 exit(1);
1752 memory_try_enable_merging(vaddr, length);
1753 qemu_ram_setup_dump(vaddr, length);
1758 #endif /* !_WIN32 */
1760 int qemu_get_ram_fd(ram_addr_t addr)
1762 RAMBlock *block;
1763 int fd;
1765 rcu_read_lock();
1766 block = qemu_get_ram_block(addr);
1767 fd = block->fd;
1768 rcu_read_unlock();
1769 return fd;
1772 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1774 RAMBlock *block;
1775 void *ptr;
1777 rcu_read_lock();
1778 block = qemu_get_ram_block(addr);
1779 ptr = ramblock_ptr(block, 0);
1780 rcu_read_unlock();
1781 return ptr;
1784 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1785 * This should not be used for general purpose DMA. Use address_space_map
1786 * or address_space_rw instead. For local memory (e.g. video ram) that the
1787 * device owns, use memory_region_get_ram_ptr.
1789 * By the time this function returns, the returned pointer is not protected
1790 * by RCU anymore. If the caller is not within an RCU critical section and
1791 * does not hold the iothread lock, it must have other means of protecting the
1792 * pointer, such as a reference to the region that includes the incoming
1793 * ram_addr_t.
1795 void *qemu_get_ram_ptr(ram_addr_t addr)
1797 RAMBlock *block;
1798 void *ptr;
1800 rcu_read_lock();
1801 block = qemu_get_ram_block(addr);
1803 if (xen_enabled() && block->host == NULL) {
1804 /* We need to check if the requested address is in the RAM
1805 * because we don't want to map the entire memory in QEMU.
1806 * In that case just map until the end of the page.
1808 if (block->offset == 0) {
1809 ptr = xen_map_cache(addr, 0, 0);
1810 goto unlock;
1813 block->host = xen_map_cache(block->offset, block->max_length, 1);
1815 ptr = ramblock_ptr(block, addr - block->offset);
1817 unlock:
1818 rcu_read_unlock();
1819 return ptr;
1822 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1823 * but takes a size argument.
1825 * By the time this function returns, the returned pointer is not protected
1826 * by RCU anymore. If the caller is not within an RCU critical section and
1827 * does not hold the iothread lock, it must have other means of protecting the
1828 * pointer, such as a reference to the region that includes the incoming
1829 * ram_addr_t.
1831 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1833 void *ptr;
1834 if (*size == 0) {
1835 return NULL;
1837 if (xen_enabled()) {
1838 return xen_map_cache(addr, *size, 1);
1839 } else {
1840 RAMBlock *block;
1841 rcu_read_lock();
1842 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1843 if (addr - block->offset < block->max_length) {
1844 if (addr - block->offset + *size > block->max_length)
1845 *size = block->max_length - addr + block->offset;
1846 ptr = ramblock_ptr(block, addr - block->offset);
1847 rcu_read_unlock();
1848 return ptr;
1852 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1853 abort();
1857 /* Some of the softmmu routines need to translate from a host pointer
1858 * (typically a TLB entry) back to a ram offset.
1860 * By the time this function returns, the returned pointer is not protected
1861 * by RCU anymore. If the caller is not within an RCU critical section and
1862 * does not hold the iothread lock, it must have other means of protecting the
1863 * pointer, such as a reference to the region that includes the incoming
1864 * ram_addr_t.
1866 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1868 RAMBlock *block;
1869 uint8_t *host = ptr;
1870 MemoryRegion *mr;
1872 if (xen_enabled()) {
1873 rcu_read_lock();
1874 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1875 mr = qemu_get_ram_block(*ram_addr)->mr;
1876 rcu_read_unlock();
1877 return mr;
1880 rcu_read_lock();
1881 block = atomic_rcu_read(&ram_list.mru_block);
1882 if (block && block->host && host - block->host < block->max_length) {
1883 goto found;
1886 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1887 /* This case append when the block is not mapped. */
1888 if (block->host == NULL) {
1889 continue;
1891 if (host - block->host < block->max_length) {
1892 goto found;
1896 rcu_read_unlock();
1897 return NULL;
1899 found:
1900 *ram_addr = block->offset + (host - block->host);
1901 mr = block->mr;
1902 rcu_read_unlock();
1903 return mr;
1906 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1907 uint64_t val, unsigned size)
1909 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1910 tb_invalidate_phys_page_fast(ram_addr, size);
1912 switch (size) {
1913 case 1:
1914 stb_p(qemu_get_ram_ptr(ram_addr), val);
1915 break;
1916 case 2:
1917 stw_p(qemu_get_ram_ptr(ram_addr), val);
1918 break;
1919 case 4:
1920 stl_p(qemu_get_ram_ptr(ram_addr), val);
1921 break;
1922 default:
1923 abort();
1925 /* Set both VGA and migration bits for simplicity and to remove
1926 * the notdirty callback faster.
1928 cpu_physical_memory_set_dirty_range(ram_addr, size,
1929 DIRTY_CLIENTS_NOCODE);
1930 /* we remove the notdirty callback only if the code has been
1931 flushed */
1932 if (!cpu_physical_memory_is_clean(ram_addr)) {
1933 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
1937 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1938 unsigned size, bool is_write)
1940 return is_write;
1943 static const MemoryRegionOps notdirty_mem_ops = {
1944 .write = notdirty_mem_write,
1945 .valid.accepts = notdirty_mem_accepts,
1946 .endianness = DEVICE_NATIVE_ENDIAN,
1949 /* Generate a debug exception if a watchpoint has been hit. */
1950 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
1952 CPUState *cpu = current_cpu;
1953 CPUArchState *env = cpu->env_ptr;
1954 target_ulong pc, cs_base;
1955 target_ulong vaddr;
1956 CPUWatchpoint *wp;
1957 int cpu_flags;
1959 if (cpu->watchpoint_hit) {
1960 /* We re-entered the check after replacing the TB. Now raise
1961 * the debug interrupt so that is will trigger after the
1962 * current instruction. */
1963 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1964 return;
1966 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1967 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1968 if (cpu_watchpoint_address_matches(wp, vaddr, len)
1969 && (wp->flags & flags)) {
1970 if (flags == BP_MEM_READ) {
1971 wp->flags |= BP_WATCHPOINT_HIT_READ;
1972 } else {
1973 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
1975 wp->hitaddr = vaddr;
1976 wp->hitattrs = attrs;
1977 if (!cpu->watchpoint_hit) {
1978 cpu->watchpoint_hit = wp;
1979 tb_check_watchpoint(cpu);
1980 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1981 cpu->exception_index = EXCP_DEBUG;
1982 cpu_loop_exit(cpu);
1983 } else {
1984 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1985 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1986 cpu_resume_from_signal(cpu, NULL);
1989 } else {
1990 wp->flags &= ~BP_WATCHPOINT_HIT;
1995 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1996 so these check for a hit then pass through to the normal out-of-line
1997 phys routines. */
1998 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
1999 unsigned size, MemTxAttrs attrs)
2001 MemTxResult res;
2002 uint64_t data;
2004 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2005 switch (size) {
2006 case 1:
2007 data = address_space_ldub(&address_space_memory, addr, attrs, &res);
2008 break;
2009 case 2:
2010 data = address_space_lduw(&address_space_memory, addr, attrs, &res);
2011 break;
2012 case 4:
2013 data = address_space_ldl(&address_space_memory, addr, attrs, &res);
2014 break;
2015 default: abort();
2017 *pdata = data;
2018 return res;
2021 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2022 uint64_t val, unsigned size,
2023 MemTxAttrs attrs)
2025 MemTxResult res;
2027 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2028 switch (size) {
2029 case 1:
2030 address_space_stb(&address_space_memory, addr, val, attrs, &res);
2031 break;
2032 case 2:
2033 address_space_stw(&address_space_memory, addr, val, attrs, &res);
2034 break;
2035 case 4:
2036 address_space_stl(&address_space_memory, addr, val, attrs, &res);
2037 break;
2038 default: abort();
2040 return res;
2043 static const MemoryRegionOps watch_mem_ops = {
2044 .read_with_attrs = watch_mem_read,
2045 .write_with_attrs = watch_mem_write,
2046 .endianness = DEVICE_NATIVE_ENDIAN,
2049 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2050 unsigned len, MemTxAttrs attrs)
2052 subpage_t *subpage = opaque;
2053 uint8_t buf[8];
2054 MemTxResult res;
2056 #if defined(DEBUG_SUBPAGE)
2057 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2058 subpage, len, addr);
2059 #endif
2060 res = address_space_read(subpage->as, addr + subpage->base,
2061 attrs, buf, len);
2062 if (res) {
2063 return res;
2065 switch (len) {
2066 case 1:
2067 *data = ldub_p(buf);
2068 return MEMTX_OK;
2069 case 2:
2070 *data = lduw_p(buf);
2071 return MEMTX_OK;
2072 case 4:
2073 *data = ldl_p(buf);
2074 return MEMTX_OK;
2075 case 8:
2076 *data = ldq_p(buf);
2077 return MEMTX_OK;
2078 default:
2079 abort();
2083 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2084 uint64_t value, unsigned len, MemTxAttrs attrs)
2086 subpage_t *subpage = opaque;
2087 uint8_t buf[8];
2089 #if defined(DEBUG_SUBPAGE)
2090 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2091 " value %"PRIx64"\n",
2092 __func__, subpage, len, addr, value);
2093 #endif
2094 switch (len) {
2095 case 1:
2096 stb_p(buf, value);
2097 break;
2098 case 2:
2099 stw_p(buf, value);
2100 break;
2101 case 4:
2102 stl_p(buf, value);
2103 break;
2104 case 8:
2105 stq_p(buf, value);
2106 break;
2107 default:
2108 abort();
2110 return address_space_write(subpage->as, addr + subpage->base,
2111 attrs, buf, len);
2114 static bool subpage_accepts(void *opaque, hwaddr addr,
2115 unsigned len, bool is_write)
2117 subpage_t *subpage = opaque;
2118 #if defined(DEBUG_SUBPAGE)
2119 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2120 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2121 #endif
2123 return address_space_access_valid(subpage->as, addr + subpage->base,
2124 len, is_write);
2127 static const MemoryRegionOps subpage_ops = {
2128 .read_with_attrs = subpage_read,
2129 .write_with_attrs = subpage_write,
2130 .impl.min_access_size = 1,
2131 .impl.max_access_size = 8,
2132 .valid.min_access_size = 1,
2133 .valid.max_access_size = 8,
2134 .valid.accepts = subpage_accepts,
2135 .endianness = DEVICE_NATIVE_ENDIAN,
2138 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2139 uint16_t section)
2141 int idx, eidx;
2143 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2144 return -1;
2145 idx = SUBPAGE_IDX(start);
2146 eidx = SUBPAGE_IDX(end);
2147 #if defined(DEBUG_SUBPAGE)
2148 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2149 __func__, mmio, start, end, idx, eidx, section);
2150 #endif
2151 for (; idx <= eidx; idx++) {
2152 mmio->sub_section[idx] = section;
2155 return 0;
2158 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2160 subpage_t *mmio;
2162 mmio = g_malloc0(sizeof(subpage_t));
2164 mmio->as = as;
2165 mmio->base = base;
2166 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2167 NULL, TARGET_PAGE_SIZE);
2168 mmio->iomem.subpage = true;
2169 #if defined(DEBUG_SUBPAGE)
2170 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2171 mmio, base, TARGET_PAGE_SIZE);
2172 #endif
2173 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2175 return mmio;
2178 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2179 MemoryRegion *mr)
2181 assert(as);
2182 MemoryRegionSection section = {
2183 .address_space = as,
2184 .mr = mr,
2185 .offset_within_address_space = 0,
2186 .offset_within_region = 0,
2187 .size = int128_2_64(),
2190 return phys_section_add(map, &section);
2193 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
2195 AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch);
2196 MemoryRegionSection *sections = d->map.sections;
2198 return sections[index & ~TARGET_PAGE_MASK].mr;
2201 static void io_mem_init(void)
2203 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2204 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2205 NULL, UINT64_MAX);
2206 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2207 NULL, UINT64_MAX);
2208 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2209 NULL, UINT64_MAX);
2212 static void mem_begin(MemoryListener *listener)
2214 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2215 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2216 uint16_t n;
2218 n = dummy_section(&d->map, as, &io_mem_unassigned);
2219 assert(n == PHYS_SECTION_UNASSIGNED);
2220 n = dummy_section(&d->map, as, &io_mem_notdirty);
2221 assert(n == PHYS_SECTION_NOTDIRTY);
2222 n = dummy_section(&d->map, as, &io_mem_rom);
2223 assert(n == PHYS_SECTION_ROM);
2224 n = dummy_section(&d->map, as, &io_mem_watch);
2225 assert(n == PHYS_SECTION_WATCH);
2227 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2228 d->as = as;
2229 as->next_dispatch = d;
2232 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2234 phys_sections_free(&d->map);
2235 g_free(d);
2238 static void mem_commit(MemoryListener *listener)
2240 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2241 AddressSpaceDispatch *cur = as->dispatch;
2242 AddressSpaceDispatch *next = as->next_dispatch;
2244 phys_page_compact_all(next, next->map.nodes_nb);
2246 atomic_rcu_set(&as->dispatch, next);
2247 if (cur) {
2248 call_rcu(cur, address_space_dispatch_free, rcu);
2252 static void tcg_commit(MemoryListener *listener)
2254 CPUState *cpu;
2256 /* since each CPU stores ram addresses in its TLB cache, we must
2257 reset the modified entries */
2258 /* XXX: slow ! */
2259 CPU_FOREACH(cpu) {
2260 /* FIXME: Disentangle the cpu.h circular files deps so we can
2261 directly get the right CPU from listener. */
2262 if (cpu->tcg_as_listener != listener) {
2263 continue;
2265 cpu_reload_memory_map(cpu);
2269 void address_space_init_dispatch(AddressSpace *as)
2271 as->dispatch = NULL;
2272 as->dispatch_listener = (MemoryListener) {
2273 .begin = mem_begin,
2274 .commit = mem_commit,
2275 .region_add = mem_add,
2276 .region_nop = mem_add,
2277 .priority = 0,
2279 memory_listener_register(&as->dispatch_listener, as);
2282 void address_space_unregister(AddressSpace *as)
2284 memory_listener_unregister(&as->dispatch_listener);
2287 void address_space_destroy_dispatch(AddressSpace *as)
2289 AddressSpaceDispatch *d = as->dispatch;
2291 atomic_rcu_set(&as->dispatch, NULL);
2292 if (d) {
2293 call_rcu(d, address_space_dispatch_free, rcu);
2297 static void memory_map_init(void)
2299 system_memory = g_malloc(sizeof(*system_memory));
2301 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2302 address_space_init(&address_space_memory, system_memory, "memory");
2304 system_io = g_malloc(sizeof(*system_io));
2305 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2306 65536);
2307 address_space_init(&address_space_io, system_io, "I/O");
2310 MemoryRegion *get_system_memory(void)
2312 return system_memory;
2315 MemoryRegion *get_system_io(void)
2317 return system_io;
2320 #endif /* !defined(CONFIG_USER_ONLY) */
2322 /* physical memory access (slow version, mainly for debug) */
2323 #if defined(CONFIG_USER_ONLY)
2324 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2325 uint8_t *buf, int len, int is_write)
2327 int l, flags;
2328 target_ulong page;
2329 void * p;
2331 while (len > 0) {
2332 page = addr & TARGET_PAGE_MASK;
2333 l = (page + TARGET_PAGE_SIZE) - addr;
2334 if (l > len)
2335 l = len;
2336 flags = page_get_flags(page);
2337 if (!(flags & PAGE_VALID))
2338 return -1;
2339 if (is_write) {
2340 if (!(flags & PAGE_WRITE))
2341 return -1;
2342 /* XXX: this code should not depend on lock_user */
2343 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2344 return -1;
2345 memcpy(p, buf, l);
2346 unlock_user(p, addr, l);
2347 } else {
2348 if (!(flags & PAGE_READ))
2349 return -1;
2350 /* XXX: this code should not depend on lock_user */
2351 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2352 return -1;
2353 memcpy(buf, p, l);
2354 unlock_user(p, addr, 0);
2356 len -= l;
2357 buf += l;
2358 addr += l;
2360 return 0;
2363 #else
2365 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2366 hwaddr length)
2368 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2369 /* No early return if dirty_log_mask is or becomes 0, because
2370 * cpu_physical_memory_set_dirty_range will still call
2371 * xen_modified_memory.
2373 if (dirty_log_mask) {
2374 dirty_log_mask =
2375 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2377 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2378 tb_invalidate_phys_range(addr, addr + length);
2379 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2381 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2384 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2386 unsigned access_size_max = mr->ops->valid.max_access_size;
2388 /* Regions are assumed to support 1-4 byte accesses unless
2389 otherwise specified. */
2390 if (access_size_max == 0) {
2391 access_size_max = 4;
2394 /* Bound the maximum access by the alignment of the address. */
2395 if (!mr->ops->impl.unaligned) {
2396 unsigned align_size_max = addr & -addr;
2397 if (align_size_max != 0 && align_size_max < access_size_max) {
2398 access_size_max = align_size_max;
2402 /* Don't attempt accesses larger than the maximum. */
2403 if (l > access_size_max) {
2404 l = access_size_max;
2406 l = pow2floor(l);
2408 return l;
2411 static bool prepare_mmio_access(MemoryRegion *mr)
2413 bool unlocked = !qemu_mutex_iothread_locked();
2414 bool release_lock = false;
2416 if (unlocked && mr->global_locking) {
2417 qemu_mutex_lock_iothread();
2418 unlocked = false;
2419 release_lock = true;
2421 if (mr->flush_coalesced_mmio) {
2422 if (unlocked) {
2423 qemu_mutex_lock_iothread();
2425 qemu_flush_coalesced_mmio_buffer();
2426 if (unlocked) {
2427 qemu_mutex_unlock_iothread();
2431 return release_lock;
2434 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2435 uint8_t *buf, int len, bool is_write)
2437 hwaddr l;
2438 uint8_t *ptr;
2439 uint64_t val;
2440 hwaddr addr1;
2441 MemoryRegion *mr;
2442 MemTxResult result = MEMTX_OK;
2443 bool release_lock = false;
2445 rcu_read_lock();
2446 while (len > 0) {
2447 l = len;
2448 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2450 if (is_write) {
2451 if (!memory_access_is_direct(mr, is_write)) {
2452 release_lock |= prepare_mmio_access(mr);
2453 l = memory_access_size(mr, l, addr1);
2454 /* XXX: could force current_cpu to NULL to avoid
2455 potential bugs */
2456 switch (l) {
2457 case 8:
2458 /* 64 bit write access */
2459 val = ldq_p(buf);
2460 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2461 attrs);
2462 break;
2463 case 4:
2464 /* 32 bit write access */
2465 val = ldl_p(buf);
2466 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2467 attrs);
2468 break;
2469 case 2:
2470 /* 16 bit write access */
2471 val = lduw_p(buf);
2472 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2473 attrs);
2474 break;
2475 case 1:
2476 /* 8 bit write access */
2477 val = ldub_p(buf);
2478 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2479 attrs);
2480 break;
2481 default:
2482 abort();
2484 } else {
2485 addr1 += memory_region_get_ram_addr(mr);
2486 /* RAM case */
2487 ptr = qemu_get_ram_ptr(addr1);
2488 memcpy(ptr, buf, l);
2489 invalidate_and_set_dirty(mr, addr1, l);
2491 } else {
2492 if (!memory_access_is_direct(mr, is_write)) {
2493 /* I/O case */
2494 release_lock |= prepare_mmio_access(mr);
2495 l = memory_access_size(mr, l, addr1);
2496 switch (l) {
2497 case 8:
2498 /* 64 bit read access */
2499 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2500 attrs);
2501 stq_p(buf, val);
2502 break;
2503 case 4:
2504 /* 32 bit read access */
2505 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2506 attrs);
2507 stl_p(buf, val);
2508 break;
2509 case 2:
2510 /* 16 bit read access */
2511 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2512 attrs);
2513 stw_p(buf, val);
2514 break;
2515 case 1:
2516 /* 8 bit read access */
2517 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2518 attrs);
2519 stb_p(buf, val);
2520 break;
2521 default:
2522 abort();
2524 } else {
2525 /* RAM case */
2526 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2527 memcpy(buf, ptr, l);
2531 if (release_lock) {
2532 qemu_mutex_unlock_iothread();
2533 release_lock = false;
2536 len -= l;
2537 buf += l;
2538 addr += l;
2540 rcu_read_unlock();
2542 return result;
2545 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2546 const uint8_t *buf, int len)
2548 return address_space_rw(as, addr, attrs, (uint8_t *)buf, len, true);
2551 MemTxResult address_space_read(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2552 uint8_t *buf, int len)
2554 return address_space_rw(as, addr, attrs, buf, len, false);
2558 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2559 int len, int is_write)
2561 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2562 buf, len, is_write);
2565 enum write_rom_type {
2566 WRITE_DATA,
2567 FLUSH_CACHE,
2570 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2571 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2573 hwaddr l;
2574 uint8_t *ptr;
2575 hwaddr addr1;
2576 MemoryRegion *mr;
2578 rcu_read_lock();
2579 while (len > 0) {
2580 l = len;
2581 mr = address_space_translate(as, addr, &addr1, &l, true);
2583 if (!(memory_region_is_ram(mr) ||
2584 memory_region_is_romd(mr))) {
2585 l = memory_access_size(mr, l, addr1);
2586 } else {
2587 addr1 += memory_region_get_ram_addr(mr);
2588 /* ROM/RAM case */
2589 ptr = qemu_get_ram_ptr(addr1);
2590 switch (type) {
2591 case WRITE_DATA:
2592 memcpy(ptr, buf, l);
2593 invalidate_and_set_dirty(mr, addr1, l);
2594 break;
2595 case FLUSH_CACHE:
2596 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2597 break;
2600 len -= l;
2601 buf += l;
2602 addr += l;
2604 rcu_read_unlock();
2607 /* used for ROM loading : can write in RAM and ROM */
2608 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2609 const uint8_t *buf, int len)
2611 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2614 void cpu_flush_icache_range(hwaddr start, int len)
2617 * This function should do the same thing as an icache flush that was
2618 * triggered from within the guest. For TCG we are always cache coherent,
2619 * so there is no need to flush anything. For KVM / Xen we need to flush
2620 * the host's instruction cache at least.
2622 if (tcg_enabled()) {
2623 return;
2626 cpu_physical_memory_write_rom_internal(&address_space_memory,
2627 start, NULL, len, FLUSH_CACHE);
2630 typedef struct {
2631 MemoryRegion *mr;
2632 void *buffer;
2633 hwaddr addr;
2634 hwaddr len;
2635 bool in_use;
2636 } BounceBuffer;
2638 static BounceBuffer bounce;
2640 typedef struct MapClient {
2641 QEMUBH *bh;
2642 QLIST_ENTRY(MapClient) link;
2643 } MapClient;
2645 QemuMutex map_client_list_lock;
2646 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2647 = QLIST_HEAD_INITIALIZER(map_client_list);
2649 static void cpu_unregister_map_client_do(MapClient *client)
2651 QLIST_REMOVE(client, link);
2652 g_free(client);
2655 static void cpu_notify_map_clients_locked(void)
2657 MapClient *client;
2659 while (!QLIST_EMPTY(&map_client_list)) {
2660 client = QLIST_FIRST(&map_client_list);
2661 qemu_bh_schedule(client->bh);
2662 cpu_unregister_map_client_do(client);
2666 void cpu_register_map_client(QEMUBH *bh)
2668 MapClient *client = g_malloc(sizeof(*client));
2670 qemu_mutex_lock(&map_client_list_lock);
2671 client->bh = bh;
2672 QLIST_INSERT_HEAD(&map_client_list, client, link);
2673 if (!atomic_read(&bounce.in_use)) {
2674 cpu_notify_map_clients_locked();
2676 qemu_mutex_unlock(&map_client_list_lock);
2679 void cpu_exec_init_all(void)
2681 qemu_mutex_init(&ram_list.mutex);
2682 memory_map_init();
2683 io_mem_init();
2684 qemu_mutex_init(&map_client_list_lock);
2687 void cpu_unregister_map_client(QEMUBH *bh)
2689 MapClient *client;
2691 qemu_mutex_lock(&map_client_list_lock);
2692 QLIST_FOREACH(client, &map_client_list, link) {
2693 if (client->bh == bh) {
2694 cpu_unregister_map_client_do(client);
2695 break;
2698 qemu_mutex_unlock(&map_client_list_lock);
2701 static void cpu_notify_map_clients(void)
2703 qemu_mutex_lock(&map_client_list_lock);
2704 cpu_notify_map_clients_locked();
2705 qemu_mutex_unlock(&map_client_list_lock);
2708 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2710 MemoryRegion *mr;
2711 hwaddr l, xlat;
2713 rcu_read_lock();
2714 while (len > 0) {
2715 l = len;
2716 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2717 if (!memory_access_is_direct(mr, is_write)) {
2718 l = memory_access_size(mr, l, addr);
2719 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2720 return false;
2724 len -= l;
2725 addr += l;
2727 rcu_read_unlock();
2728 return true;
2731 /* Map a physical memory region into a host virtual address.
2732 * May map a subset of the requested range, given by and returned in *plen.
2733 * May return NULL if resources needed to perform the mapping are exhausted.
2734 * Use only for reads OR writes - not for read-modify-write operations.
2735 * Use cpu_register_map_client() to know when retrying the map operation is
2736 * likely to succeed.
2738 void *address_space_map(AddressSpace *as,
2739 hwaddr addr,
2740 hwaddr *plen,
2741 bool is_write)
2743 hwaddr len = *plen;
2744 hwaddr done = 0;
2745 hwaddr l, xlat, base;
2746 MemoryRegion *mr, *this_mr;
2747 ram_addr_t raddr;
2749 if (len == 0) {
2750 return NULL;
2753 l = len;
2754 rcu_read_lock();
2755 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2757 if (!memory_access_is_direct(mr, is_write)) {
2758 if (atomic_xchg(&bounce.in_use, true)) {
2759 rcu_read_unlock();
2760 return NULL;
2762 /* Avoid unbounded allocations */
2763 l = MIN(l, TARGET_PAGE_SIZE);
2764 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2765 bounce.addr = addr;
2766 bounce.len = l;
2768 memory_region_ref(mr);
2769 bounce.mr = mr;
2770 if (!is_write) {
2771 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2772 bounce.buffer, l);
2775 rcu_read_unlock();
2776 *plen = l;
2777 return bounce.buffer;
2780 base = xlat;
2781 raddr = memory_region_get_ram_addr(mr);
2783 for (;;) {
2784 len -= l;
2785 addr += l;
2786 done += l;
2787 if (len == 0) {
2788 break;
2791 l = len;
2792 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2793 if (this_mr != mr || xlat != base + done) {
2794 break;
2798 memory_region_ref(mr);
2799 rcu_read_unlock();
2800 *plen = done;
2801 return qemu_ram_ptr_length(raddr + base, plen);
2804 /* Unmaps a memory region previously mapped by address_space_map().
2805 * Will also mark the memory as dirty if is_write == 1. access_len gives
2806 * the amount of memory that was actually read or written by the caller.
2808 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2809 int is_write, hwaddr access_len)
2811 if (buffer != bounce.buffer) {
2812 MemoryRegion *mr;
2813 ram_addr_t addr1;
2815 mr = qemu_ram_addr_from_host(buffer, &addr1);
2816 assert(mr != NULL);
2817 if (is_write) {
2818 invalidate_and_set_dirty(mr, addr1, access_len);
2820 if (xen_enabled()) {
2821 xen_invalidate_map_cache_entry(buffer);
2823 memory_region_unref(mr);
2824 return;
2826 if (is_write) {
2827 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
2828 bounce.buffer, access_len);
2830 qemu_vfree(bounce.buffer);
2831 bounce.buffer = NULL;
2832 memory_region_unref(bounce.mr);
2833 atomic_mb_set(&bounce.in_use, false);
2834 cpu_notify_map_clients();
2837 void *cpu_physical_memory_map(hwaddr addr,
2838 hwaddr *plen,
2839 int is_write)
2841 return address_space_map(&address_space_memory, addr, plen, is_write);
2844 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2845 int is_write, hwaddr access_len)
2847 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2850 /* warning: addr must be aligned */
2851 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
2852 MemTxAttrs attrs,
2853 MemTxResult *result,
2854 enum device_endian endian)
2856 uint8_t *ptr;
2857 uint64_t val;
2858 MemoryRegion *mr;
2859 hwaddr l = 4;
2860 hwaddr addr1;
2861 MemTxResult r;
2862 bool release_lock = false;
2864 rcu_read_lock();
2865 mr = address_space_translate(as, addr, &addr1, &l, false);
2866 if (l < 4 || !memory_access_is_direct(mr, false)) {
2867 release_lock |= prepare_mmio_access(mr);
2869 /* I/O case */
2870 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
2871 #if defined(TARGET_WORDS_BIGENDIAN)
2872 if (endian == DEVICE_LITTLE_ENDIAN) {
2873 val = bswap32(val);
2875 #else
2876 if (endian == DEVICE_BIG_ENDIAN) {
2877 val = bswap32(val);
2879 #endif
2880 } else {
2881 /* RAM case */
2882 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2883 & TARGET_PAGE_MASK)
2884 + addr1);
2885 switch (endian) {
2886 case DEVICE_LITTLE_ENDIAN:
2887 val = ldl_le_p(ptr);
2888 break;
2889 case DEVICE_BIG_ENDIAN:
2890 val = ldl_be_p(ptr);
2891 break;
2892 default:
2893 val = ldl_p(ptr);
2894 break;
2896 r = MEMTX_OK;
2898 if (result) {
2899 *result = r;
2901 if (release_lock) {
2902 qemu_mutex_unlock_iothread();
2904 rcu_read_unlock();
2905 return val;
2908 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
2909 MemTxAttrs attrs, MemTxResult *result)
2911 return address_space_ldl_internal(as, addr, attrs, result,
2912 DEVICE_NATIVE_ENDIAN);
2915 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
2916 MemTxAttrs attrs, MemTxResult *result)
2918 return address_space_ldl_internal(as, addr, attrs, result,
2919 DEVICE_LITTLE_ENDIAN);
2922 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
2923 MemTxAttrs attrs, MemTxResult *result)
2925 return address_space_ldl_internal(as, addr, attrs, result,
2926 DEVICE_BIG_ENDIAN);
2929 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2931 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2934 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2936 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2939 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2941 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2944 /* warning: addr must be aligned */
2945 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
2946 MemTxAttrs attrs,
2947 MemTxResult *result,
2948 enum device_endian endian)
2950 uint8_t *ptr;
2951 uint64_t val;
2952 MemoryRegion *mr;
2953 hwaddr l = 8;
2954 hwaddr addr1;
2955 MemTxResult r;
2956 bool release_lock = false;
2958 rcu_read_lock();
2959 mr = address_space_translate(as, addr, &addr1, &l,
2960 false);
2961 if (l < 8 || !memory_access_is_direct(mr, false)) {
2962 release_lock |= prepare_mmio_access(mr);
2964 /* I/O case */
2965 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
2966 #if defined(TARGET_WORDS_BIGENDIAN)
2967 if (endian == DEVICE_LITTLE_ENDIAN) {
2968 val = bswap64(val);
2970 #else
2971 if (endian == DEVICE_BIG_ENDIAN) {
2972 val = bswap64(val);
2974 #endif
2975 } else {
2976 /* RAM case */
2977 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2978 & TARGET_PAGE_MASK)
2979 + addr1);
2980 switch (endian) {
2981 case DEVICE_LITTLE_ENDIAN:
2982 val = ldq_le_p(ptr);
2983 break;
2984 case DEVICE_BIG_ENDIAN:
2985 val = ldq_be_p(ptr);
2986 break;
2987 default:
2988 val = ldq_p(ptr);
2989 break;
2991 r = MEMTX_OK;
2993 if (result) {
2994 *result = r;
2996 if (release_lock) {
2997 qemu_mutex_unlock_iothread();
2999 rcu_read_unlock();
3000 return val;
3003 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3004 MemTxAttrs attrs, MemTxResult *result)
3006 return address_space_ldq_internal(as, addr, attrs, result,
3007 DEVICE_NATIVE_ENDIAN);
3010 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3011 MemTxAttrs attrs, MemTxResult *result)
3013 return address_space_ldq_internal(as, addr, attrs, result,
3014 DEVICE_LITTLE_ENDIAN);
3017 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3018 MemTxAttrs attrs, MemTxResult *result)
3020 return address_space_ldq_internal(as, addr, attrs, result,
3021 DEVICE_BIG_ENDIAN);
3024 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3026 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3029 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3031 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3034 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3036 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3039 /* XXX: optimize */
3040 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3041 MemTxAttrs attrs, MemTxResult *result)
3043 uint8_t val;
3044 MemTxResult r;
3046 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3047 if (result) {
3048 *result = r;
3050 return val;
3053 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3055 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3058 /* warning: addr must be aligned */
3059 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3060 hwaddr addr,
3061 MemTxAttrs attrs,
3062 MemTxResult *result,
3063 enum device_endian endian)
3065 uint8_t *ptr;
3066 uint64_t val;
3067 MemoryRegion *mr;
3068 hwaddr l = 2;
3069 hwaddr addr1;
3070 MemTxResult r;
3071 bool release_lock = false;
3073 rcu_read_lock();
3074 mr = address_space_translate(as, addr, &addr1, &l,
3075 false);
3076 if (l < 2 || !memory_access_is_direct(mr, false)) {
3077 release_lock |= prepare_mmio_access(mr);
3079 /* I/O case */
3080 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3081 #if defined(TARGET_WORDS_BIGENDIAN)
3082 if (endian == DEVICE_LITTLE_ENDIAN) {
3083 val = bswap16(val);
3085 #else
3086 if (endian == DEVICE_BIG_ENDIAN) {
3087 val = bswap16(val);
3089 #endif
3090 } else {
3091 /* RAM case */
3092 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3093 & TARGET_PAGE_MASK)
3094 + addr1);
3095 switch (endian) {
3096 case DEVICE_LITTLE_ENDIAN:
3097 val = lduw_le_p(ptr);
3098 break;
3099 case DEVICE_BIG_ENDIAN:
3100 val = lduw_be_p(ptr);
3101 break;
3102 default:
3103 val = lduw_p(ptr);
3104 break;
3106 r = MEMTX_OK;
3108 if (result) {
3109 *result = r;
3111 if (release_lock) {
3112 qemu_mutex_unlock_iothread();
3114 rcu_read_unlock();
3115 return val;
3118 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3119 MemTxAttrs attrs, MemTxResult *result)
3121 return address_space_lduw_internal(as, addr, attrs, result,
3122 DEVICE_NATIVE_ENDIAN);
3125 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3126 MemTxAttrs attrs, MemTxResult *result)
3128 return address_space_lduw_internal(as, addr, attrs, result,
3129 DEVICE_LITTLE_ENDIAN);
3132 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3133 MemTxAttrs attrs, MemTxResult *result)
3135 return address_space_lduw_internal(as, addr, attrs, result,
3136 DEVICE_BIG_ENDIAN);
3139 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3141 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3144 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3146 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3149 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3151 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3154 /* warning: addr must be aligned. The ram page is not masked as dirty
3155 and the code inside is not invalidated. It is useful if the dirty
3156 bits are used to track modified PTEs */
3157 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3158 MemTxAttrs attrs, MemTxResult *result)
3160 uint8_t *ptr;
3161 MemoryRegion *mr;
3162 hwaddr l = 4;
3163 hwaddr addr1;
3164 MemTxResult r;
3165 uint8_t dirty_log_mask;
3166 bool release_lock = false;
3168 rcu_read_lock();
3169 mr = address_space_translate(as, addr, &addr1, &l,
3170 true);
3171 if (l < 4 || !memory_access_is_direct(mr, true)) {
3172 release_lock |= prepare_mmio_access(mr);
3174 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3175 } else {
3176 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3177 ptr = qemu_get_ram_ptr(addr1);
3178 stl_p(ptr, val);
3180 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3181 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3182 cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3183 r = MEMTX_OK;
3185 if (result) {
3186 *result = r;
3188 if (release_lock) {
3189 qemu_mutex_unlock_iothread();
3191 rcu_read_unlock();
3194 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3196 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3199 /* warning: addr must be aligned */
3200 static inline void address_space_stl_internal(AddressSpace *as,
3201 hwaddr addr, uint32_t val,
3202 MemTxAttrs attrs,
3203 MemTxResult *result,
3204 enum device_endian endian)
3206 uint8_t *ptr;
3207 MemoryRegion *mr;
3208 hwaddr l = 4;
3209 hwaddr addr1;
3210 MemTxResult r;
3211 bool release_lock = false;
3213 rcu_read_lock();
3214 mr = address_space_translate(as, addr, &addr1, &l,
3215 true);
3216 if (l < 4 || !memory_access_is_direct(mr, true)) {
3217 release_lock |= prepare_mmio_access(mr);
3219 #if defined(TARGET_WORDS_BIGENDIAN)
3220 if (endian == DEVICE_LITTLE_ENDIAN) {
3221 val = bswap32(val);
3223 #else
3224 if (endian == DEVICE_BIG_ENDIAN) {
3225 val = bswap32(val);
3227 #endif
3228 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3229 } else {
3230 /* RAM case */
3231 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3232 ptr = qemu_get_ram_ptr(addr1);
3233 switch (endian) {
3234 case DEVICE_LITTLE_ENDIAN:
3235 stl_le_p(ptr, val);
3236 break;
3237 case DEVICE_BIG_ENDIAN:
3238 stl_be_p(ptr, val);
3239 break;
3240 default:
3241 stl_p(ptr, val);
3242 break;
3244 invalidate_and_set_dirty(mr, addr1, 4);
3245 r = MEMTX_OK;
3247 if (result) {
3248 *result = r;
3250 if (release_lock) {
3251 qemu_mutex_unlock_iothread();
3253 rcu_read_unlock();
3256 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3257 MemTxAttrs attrs, MemTxResult *result)
3259 address_space_stl_internal(as, addr, val, attrs, result,
3260 DEVICE_NATIVE_ENDIAN);
3263 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3264 MemTxAttrs attrs, MemTxResult *result)
3266 address_space_stl_internal(as, addr, val, attrs, result,
3267 DEVICE_LITTLE_ENDIAN);
3270 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3271 MemTxAttrs attrs, MemTxResult *result)
3273 address_space_stl_internal(as, addr, val, attrs, result,
3274 DEVICE_BIG_ENDIAN);
3277 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3279 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3282 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3284 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3287 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3289 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3292 /* XXX: optimize */
3293 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3294 MemTxAttrs attrs, MemTxResult *result)
3296 uint8_t v = val;
3297 MemTxResult r;
3299 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3300 if (result) {
3301 *result = r;
3305 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3307 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3310 /* warning: addr must be aligned */
3311 static inline void address_space_stw_internal(AddressSpace *as,
3312 hwaddr addr, uint32_t val,
3313 MemTxAttrs attrs,
3314 MemTxResult *result,
3315 enum device_endian endian)
3317 uint8_t *ptr;
3318 MemoryRegion *mr;
3319 hwaddr l = 2;
3320 hwaddr addr1;
3321 MemTxResult r;
3322 bool release_lock = false;
3324 rcu_read_lock();
3325 mr = address_space_translate(as, addr, &addr1, &l, true);
3326 if (l < 2 || !memory_access_is_direct(mr, true)) {
3327 release_lock |= prepare_mmio_access(mr);
3329 #if defined(TARGET_WORDS_BIGENDIAN)
3330 if (endian == DEVICE_LITTLE_ENDIAN) {
3331 val = bswap16(val);
3333 #else
3334 if (endian == DEVICE_BIG_ENDIAN) {
3335 val = bswap16(val);
3337 #endif
3338 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3339 } else {
3340 /* RAM case */
3341 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3342 ptr = qemu_get_ram_ptr(addr1);
3343 switch (endian) {
3344 case DEVICE_LITTLE_ENDIAN:
3345 stw_le_p(ptr, val);
3346 break;
3347 case DEVICE_BIG_ENDIAN:
3348 stw_be_p(ptr, val);
3349 break;
3350 default:
3351 stw_p(ptr, val);
3352 break;
3354 invalidate_and_set_dirty(mr, addr1, 2);
3355 r = MEMTX_OK;
3357 if (result) {
3358 *result = r;
3360 if (release_lock) {
3361 qemu_mutex_unlock_iothread();
3363 rcu_read_unlock();
3366 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3367 MemTxAttrs attrs, MemTxResult *result)
3369 address_space_stw_internal(as, addr, val, attrs, result,
3370 DEVICE_NATIVE_ENDIAN);
3373 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3374 MemTxAttrs attrs, MemTxResult *result)
3376 address_space_stw_internal(as, addr, val, attrs, result,
3377 DEVICE_LITTLE_ENDIAN);
3380 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3381 MemTxAttrs attrs, MemTxResult *result)
3383 address_space_stw_internal(as, addr, val, attrs, result,
3384 DEVICE_BIG_ENDIAN);
3387 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3389 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3392 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3394 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3397 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3399 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3402 /* XXX: optimize */
3403 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3404 MemTxAttrs attrs, MemTxResult *result)
3406 MemTxResult r;
3407 val = tswap64(val);
3408 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3409 if (result) {
3410 *result = r;
3414 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3415 MemTxAttrs attrs, MemTxResult *result)
3417 MemTxResult r;
3418 val = cpu_to_le64(val);
3419 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3420 if (result) {
3421 *result = r;
3424 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3425 MemTxAttrs attrs, MemTxResult *result)
3427 MemTxResult r;
3428 val = cpu_to_be64(val);
3429 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3430 if (result) {
3431 *result = r;
3435 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3437 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3440 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3442 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3445 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3447 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3450 /* virtual memory access for debug (includes writing to ROM) */
3451 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3452 uint8_t *buf, int len, int is_write)
3454 int l;
3455 hwaddr phys_addr;
3456 target_ulong page;
3458 while (len > 0) {
3459 page = addr & TARGET_PAGE_MASK;
3460 phys_addr = cpu_get_phys_page_debug(cpu, page);
3461 /* if no physical page mapped, return an error */
3462 if (phys_addr == -1)
3463 return -1;
3464 l = (page + TARGET_PAGE_SIZE) - addr;
3465 if (l > len)
3466 l = len;
3467 phys_addr += (addr & ~TARGET_PAGE_MASK);
3468 if (is_write) {
3469 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
3470 } else {
3471 address_space_rw(cpu->as, phys_addr, MEMTXATTRS_UNSPECIFIED,
3472 buf, l, 0);
3474 len -= l;
3475 buf += l;
3476 addr += l;
3478 return 0;
3480 #endif
3483 * A helper function for the _utterly broken_ virtio device model to find out if
3484 * it's running on a big endian machine. Don't do this at home kids!
3486 bool target_words_bigendian(void);
3487 bool target_words_bigendian(void)
3489 #if defined(TARGET_WORDS_BIGENDIAN)
3490 return true;
3491 #else
3492 return false;
3493 #endif
3496 #ifndef CONFIG_USER_ONLY
3497 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3499 MemoryRegion*mr;
3500 hwaddr l = 1;
3501 bool res;
3503 rcu_read_lock();
3504 mr = address_space_translate(&address_space_memory,
3505 phys_addr, &phys_addr, &l, false);
3507 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3508 rcu_read_unlock();
3509 return res;
3512 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3514 RAMBlock *block;
3515 int ret = 0;
3517 rcu_read_lock();
3518 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3519 ret = func(block->idstr, block->host, block->offset,
3520 block->used_length, opaque);
3521 if (ret) {
3522 break;
3525 rcu_read_unlock();
3526 return ret;
3528 #endif