4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21 #include <sys/types.h>
25 #include "qemu-common.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
33 #include "qemu/osdep.h"
34 #include "sysemu/kvm.h"
35 #include "sysemu/sysemu.h"
36 #include "hw/xen/xen.h"
37 #include "qemu/timer.h"
38 #include "qemu/config-file.h"
39 #include "qemu/error-report.h"
40 #include "exec/memory.h"
41 #include "sysemu/dma.h"
42 #include "exec/address-spaces.h"
43 #if defined(CONFIG_USER_ONLY)
45 #else /* !CONFIG_USER_ONLY */
46 #include "sysemu/xen-mapcache.h"
49 #include "exec/cpu-all.h"
50 #include "qemu/rcu_queue.h"
51 #include "qemu/main-loop.h"
52 #include "exec/cputlb.h"
53 #include "translate-all.h"
55 #include "exec/memory-internal.h"
56 #include "exec/ram_addr.h"
58 #include "qemu/range.h"
60 //#define DEBUG_SUBPAGE
62 #if !defined(CONFIG_USER_ONLY)
63 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
64 * are protected by the ramlist lock.
66 RAMList ram_list
= { .blocks
= QLIST_HEAD_INITIALIZER(ram_list
.blocks
) };
68 static MemoryRegion
*system_memory
;
69 static MemoryRegion
*system_io
;
71 AddressSpace address_space_io
;
72 AddressSpace address_space_memory
;
74 MemoryRegion io_mem_rom
, io_mem_notdirty
;
75 static MemoryRegion io_mem_unassigned
;
77 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
78 #define RAM_PREALLOC (1 << 0)
80 /* RAM is mmap-ed with MAP_SHARED */
81 #define RAM_SHARED (1 << 1)
83 /* Only a portion of RAM (used_length) is actually used, and migrated.
84 * This used_length size can change across reboots.
86 #define RAM_RESIZEABLE (1 << 2)
90 struct CPUTailQ cpus
= QTAILQ_HEAD_INITIALIZER(cpus
);
91 /* current CPU in the current thread. It is only valid inside
93 DEFINE_TLS(CPUState
*, current_cpu
);
94 /* 0 = Do not count executed instructions.
95 1 = Precise instruction counting.
96 2 = Adaptive rate instruction counting. */
99 #if !defined(CONFIG_USER_ONLY)
101 typedef struct PhysPageEntry PhysPageEntry
;
103 struct PhysPageEntry
{
104 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
106 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
110 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
112 /* Size of the L2 (and L3, etc) page tables. */
113 #define ADDR_SPACE_BITS 64
116 #define P_L2_SIZE (1 << P_L2_BITS)
118 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
120 typedef PhysPageEntry Node
[P_L2_SIZE
];
122 typedef struct PhysPageMap
{
125 unsigned sections_nb
;
126 unsigned sections_nb_alloc
;
128 unsigned nodes_nb_alloc
;
130 MemoryRegionSection
*sections
;
133 struct AddressSpaceDispatch
{
136 /* This is a multi-level map on the physical address space.
137 * The bottom level has pointers to MemoryRegionSections.
139 PhysPageEntry phys_map
;
144 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
145 typedef struct subpage_t
{
149 uint16_t sub_section
[TARGET_PAGE_SIZE
];
152 #define PHYS_SECTION_UNASSIGNED 0
153 #define PHYS_SECTION_NOTDIRTY 1
154 #define PHYS_SECTION_ROM 2
155 #define PHYS_SECTION_WATCH 3
157 static void io_mem_init(void);
158 static void memory_map_init(void);
159 static void tcg_commit(MemoryListener
*listener
);
161 static MemoryRegion io_mem_watch
;
164 #if !defined(CONFIG_USER_ONLY)
166 static void phys_map_node_reserve(PhysPageMap
*map
, unsigned nodes
)
168 if (map
->nodes_nb
+ nodes
> map
->nodes_nb_alloc
) {
169 map
->nodes_nb_alloc
= MAX(map
->nodes_nb_alloc
* 2, 16);
170 map
->nodes_nb_alloc
= MAX(map
->nodes_nb_alloc
, map
->nodes_nb
+ nodes
);
171 map
->nodes
= g_renew(Node
, map
->nodes
, map
->nodes_nb_alloc
);
175 static uint32_t phys_map_node_alloc(PhysPageMap
*map
, bool leaf
)
182 ret
= map
->nodes_nb
++;
184 assert(ret
!= PHYS_MAP_NODE_NIL
);
185 assert(ret
!= map
->nodes_nb_alloc
);
187 e
.skip
= leaf
? 0 : 1;
188 e
.ptr
= leaf
? PHYS_SECTION_UNASSIGNED
: PHYS_MAP_NODE_NIL
;
189 for (i
= 0; i
< P_L2_SIZE
; ++i
) {
190 memcpy(&p
[i
], &e
, sizeof(e
));
195 static void phys_page_set_level(PhysPageMap
*map
, PhysPageEntry
*lp
,
196 hwaddr
*index
, hwaddr
*nb
, uint16_t leaf
,
200 hwaddr step
= (hwaddr
)1 << (level
* P_L2_BITS
);
202 if (lp
->skip
&& lp
->ptr
== PHYS_MAP_NODE_NIL
) {
203 lp
->ptr
= phys_map_node_alloc(map
, level
== 0);
205 p
= map
->nodes
[lp
->ptr
];
206 lp
= &p
[(*index
>> (level
* P_L2_BITS
)) & (P_L2_SIZE
- 1)];
208 while (*nb
&& lp
< &p
[P_L2_SIZE
]) {
209 if ((*index
& (step
- 1)) == 0 && *nb
>= step
) {
215 phys_page_set_level(map
, lp
, index
, nb
, leaf
, level
- 1);
221 static void phys_page_set(AddressSpaceDispatch
*d
,
222 hwaddr index
, hwaddr nb
,
225 /* Wildly overreserve - it doesn't matter much. */
226 phys_map_node_reserve(&d
->map
, 3 * P_L2_LEVELS
);
228 phys_page_set_level(&d
->map
, &d
->phys_map
, &index
, &nb
, leaf
, P_L2_LEVELS
- 1);
231 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
232 * and update our entry so we can skip it and go directly to the destination.
234 static void phys_page_compact(PhysPageEntry
*lp
, Node
*nodes
, unsigned long *compacted
)
236 unsigned valid_ptr
= P_L2_SIZE
;
241 if (lp
->ptr
== PHYS_MAP_NODE_NIL
) {
246 for (i
= 0; i
< P_L2_SIZE
; i
++) {
247 if (p
[i
].ptr
== PHYS_MAP_NODE_NIL
) {
254 phys_page_compact(&p
[i
], nodes
, compacted
);
258 /* We can only compress if there's only one child. */
263 assert(valid_ptr
< P_L2_SIZE
);
265 /* Don't compress if it won't fit in the # of bits we have. */
266 if (lp
->skip
+ p
[valid_ptr
].skip
>= (1 << 3)) {
270 lp
->ptr
= p
[valid_ptr
].ptr
;
271 if (!p
[valid_ptr
].skip
) {
272 /* If our only child is a leaf, make this a leaf. */
273 /* By design, we should have made this node a leaf to begin with so we
274 * should never reach here.
275 * But since it's so simple to handle this, let's do it just in case we
280 lp
->skip
+= p
[valid_ptr
].skip
;
284 static void phys_page_compact_all(AddressSpaceDispatch
*d
, int nodes_nb
)
286 DECLARE_BITMAP(compacted
, nodes_nb
);
288 if (d
->phys_map
.skip
) {
289 phys_page_compact(&d
->phys_map
, d
->map
.nodes
, compacted
);
293 static MemoryRegionSection
*phys_page_find(PhysPageEntry lp
, hwaddr addr
,
294 Node
*nodes
, MemoryRegionSection
*sections
)
297 hwaddr index
= addr
>> TARGET_PAGE_BITS
;
300 for (i
= P_L2_LEVELS
; lp
.skip
&& (i
-= lp
.skip
) >= 0;) {
301 if (lp
.ptr
== PHYS_MAP_NODE_NIL
) {
302 return §ions
[PHYS_SECTION_UNASSIGNED
];
305 lp
= p
[(index
>> (i
* P_L2_BITS
)) & (P_L2_SIZE
- 1)];
308 if (sections
[lp
.ptr
].size
.hi
||
309 range_covers_byte(sections
[lp
.ptr
].offset_within_address_space
,
310 sections
[lp
.ptr
].size
.lo
, addr
)) {
311 return §ions
[lp
.ptr
];
313 return §ions
[PHYS_SECTION_UNASSIGNED
];
317 bool memory_region_is_unassigned(MemoryRegion
*mr
)
319 return mr
!= &io_mem_rom
&& mr
!= &io_mem_notdirty
&& !mr
->rom_device
320 && mr
!= &io_mem_watch
;
323 /* Called from RCU critical section */
324 static MemoryRegionSection
*address_space_lookup_region(AddressSpaceDispatch
*d
,
326 bool resolve_subpage
)
328 MemoryRegionSection
*section
;
331 section
= phys_page_find(d
->phys_map
, addr
, d
->map
.nodes
, d
->map
.sections
);
332 if (resolve_subpage
&& section
->mr
->subpage
) {
333 subpage
= container_of(section
->mr
, subpage_t
, iomem
);
334 section
= &d
->map
.sections
[subpage
->sub_section
[SUBPAGE_IDX(addr
)]];
339 /* Called from RCU critical section */
340 static MemoryRegionSection
*
341 address_space_translate_internal(AddressSpaceDispatch
*d
, hwaddr addr
, hwaddr
*xlat
,
342 hwaddr
*plen
, bool resolve_subpage
)
344 MemoryRegionSection
*section
;
348 section
= address_space_lookup_region(d
, addr
, resolve_subpage
);
349 /* Compute offset within MemoryRegionSection */
350 addr
-= section
->offset_within_address_space
;
352 /* Compute offset within MemoryRegion */
353 *xlat
= addr
+ section
->offset_within_region
;
357 /* MMIO registers can be expected to perform full-width accesses based only
358 * on their address, without considering adjacent registers that could
359 * decode to completely different MemoryRegions. When such registers
360 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
361 * regions overlap wildly. For this reason we cannot clamp the accesses
364 * If the length is small (as is the case for address_space_ldl/stl),
365 * everything works fine. If the incoming length is large, however,
366 * the caller really has to do the clamping through memory_access_size.
368 if (memory_region_is_ram(mr
)) {
369 diff
= int128_sub(section
->size
, int128_make64(addr
));
370 *plen
= int128_get64(int128_min(diff
, int128_make64(*plen
)));
375 static inline bool memory_access_is_direct(MemoryRegion
*mr
, bool is_write
)
377 if (memory_region_is_ram(mr
)) {
378 return !(is_write
&& mr
->readonly
);
380 if (memory_region_is_romd(mr
)) {
387 /* Called from RCU critical section */
388 MemoryRegion
*address_space_translate(AddressSpace
*as
, hwaddr addr
,
389 hwaddr
*xlat
, hwaddr
*plen
,
393 MemoryRegionSection
*section
;
397 AddressSpaceDispatch
*d
= atomic_rcu_read(&as
->dispatch
);
398 section
= address_space_translate_internal(d
, addr
, &addr
, plen
, true);
401 if (!mr
->iommu_ops
) {
405 iotlb
= mr
->iommu_ops
->translate(mr
, addr
, is_write
);
406 addr
= ((iotlb
.translated_addr
& ~iotlb
.addr_mask
)
407 | (addr
& iotlb
.addr_mask
));
408 *plen
= MIN(*plen
, (addr
| iotlb
.addr_mask
) - addr
+ 1);
409 if (!(iotlb
.perm
& (1 << is_write
))) {
410 mr
= &io_mem_unassigned
;
414 as
= iotlb
.target_as
;
417 if (xen_enabled() && memory_access_is_direct(mr
, is_write
)) {
418 hwaddr page
= ((addr
& TARGET_PAGE_MASK
) + TARGET_PAGE_SIZE
) - addr
;
419 *plen
= MIN(page
, *plen
);
426 /* Called from RCU critical section */
427 MemoryRegionSection
*
428 address_space_translate_for_iotlb(CPUState
*cpu
, hwaddr addr
,
429 hwaddr
*xlat
, hwaddr
*plen
)
431 MemoryRegionSection
*section
;
432 section
= address_space_translate_internal(cpu
->memory_dispatch
,
433 addr
, xlat
, plen
, false);
435 assert(!section
->mr
->iommu_ops
);
440 #if !defined(CONFIG_USER_ONLY)
442 static int cpu_common_post_load(void *opaque
, int version_id
)
444 CPUState
*cpu
= opaque
;
446 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
447 version_id is increased. */
448 cpu
->interrupt_request
&= ~0x01;
454 static int cpu_common_pre_load(void *opaque
)
456 CPUState
*cpu
= opaque
;
458 cpu
->exception_index
= -1;
463 static bool cpu_common_exception_index_needed(void *opaque
)
465 CPUState
*cpu
= opaque
;
467 return tcg_enabled() && cpu
->exception_index
!= -1;
470 static const VMStateDescription vmstate_cpu_common_exception_index
= {
471 .name
= "cpu_common/exception_index",
473 .minimum_version_id
= 1,
474 .needed
= cpu_common_exception_index_needed
,
475 .fields
= (VMStateField
[]) {
476 VMSTATE_INT32(exception_index
, CPUState
),
477 VMSTATE_END_OF_LIST()
481 const VMStateDescription vmstate_cpu_common
= {
482 .name
= "cpu_common",
484 .minimum_version_id
= 1,
485 .pre_load
= cpu_common_pre_load
,
486 .post_load
= cpu_common_post_load
,
487 .fields
= (VMStateField
[]) {
488 VMSTATE_UINT32(halted
, CPUState
),
489 VMSTATE_UINT32(interrupt_request
, CPUState
),
490 VMSTATE_END_OF_LIST()
492 .subsections
= (const VMStateDescription
*[]) {
493 &vmstate_cpu_common_exception_index
,
500 CPUState
*qemu_get_cpu(int index
)
505 if (cpu
->cpu_index
== index
) {
513 #if !defined(CONFIG_USER_ONLY)
514 void tcg_cpu_address_space_init(CPUState
*cpu
, AddressSpace
*as
)
516 /* We only support one address space per cpu at the moment. */
517 assert(cpu
->as
== as
);
519 if (cpu
->tcg_as_listener
) {
520 memory_listener_unregister(cpu
->tcg_as_listener
);
522 cpu
->tcg_as_listener
= g_new0(MemoryListener
, 1);
524 cpu
->tcg_as_listener
->commit
= tcg_commit
;
525 memory_listener_register(cpu
->tcg_as_listener
, as
);
529 void cpu_exec_init(CPUArchState
*env
)
531 CPUState
*cpu
= ENV_GET_CPU(env
);
532 CPUClass
*cc
= CPU_GET_CLASS(cpu
);
536 #if defined(CONFIG_USER_ONLY)
540 CPU_FOREACH(some_cpu
) {
543 cpu
->cpu_index
= cpu_index
;
545 QTAILQ_INIT(&cpu
->breakpoints
);
546 QTAILQ_INIT(&cpu
->watchpoints
);
547 #ifndef CONFIG_USER_ONLY
548 cpu
->as
= &address_space_memory
;
549 cpu
->thread_id
= qemu_get_thread_id();
550 cpu_reload_memory_map(cpu
);
552 QTAILQ_INSERT_TAIL(&cpus
, cpu
, node
);
553 #if defined(CONFIG_USER_ONLY)
556 if (qdev_get_vmsd(DEVICE(cpu
)) == NULL
) {
557 vmstate_register(NULL
, cpu_index
, &vmstate_cpu_common
, cpu
);
559 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
560 register_savevm(NULL
, "cpu", cpu_index
, CPU_SAVE_VERSION
,
561 cpu_save
, cpu_load
, env
);
562 assert(cc
->vmsd
== NULL
);
563 assert(qdev_get_vmsd(DEVICE(cpu
)) == NULL
);
565 if (cc
->vmsd
!= NULL
) {
566 vmstate_register(NULL
, cpu_index
, cc
->vmsd
, cpu
);
570 #if defined(CONFIG_USER_ONLY)
571 static void breakpoint_invalidate(CPUState
*cpu
, target_ulong pc
)
573 tb_invalidate_phys_page_range(pc
, pc
+ 1, 0);
576 static void breakpoint_invalidate(CPUState
*cpu
, target_ulong pc
)
578 hwaddr phys
= cpu_get_phys_page_debug(cpu
, pc
);
580 tb_invalidate_phys_addr(cpu
->as
,
581 phys
| (pc
& ~TARGET_PAGE_MASK
));
586 #if defined(CONFIG_USER_ONLY)
587 void cpu_watchpoint_remove_all(CPUState
*cpu
, int mask
)
592 int cpu_watchpoint_remove(CPUState
*cpu
, vaddr addr
, vaddr len
,
598 void cpu_watchpoint_remove_by_ref(CPUState
*cpu
, CPUWatchpoint
*watchpoint
)
602 int cpu_watchpoint_insert(CPUState
*cpu
, vaddr addr
, vaddr len
,
603 int flags
, CPUWatchpoint
**watchpoint
)
608 /* Add a watchpoint. */
609 int cpu_watchpoint_insert(CPUState
*cpu
, vaddr addr
, vaddr len
,
610 int flags
, CPUWatchpoint
**watchpoint
)
614 /* forbid ranges which are empty or run off the end of the address space */
615 if (len
== 0 || (addr
+ len
- 1) < addr
) {
616 error_report("tried to set invalid watchpoint at %"
617 VADDR_PRIx
", len=%" VADDR_PRIu
, addr
, len
);
620 wp
= g_malloc(sizeof(*wp
));
626 /* keep all GDB-injected watchpoints in front */
627 if (flags
& BP_GDB
) {
628 QTAILQ_INSERT_HEAD(&cpu
->watchpoints
, wp
, entry
);
630 QTAILQ_INSERT_TAIL(&cpu
->watchpoints
, wp
, entry
);
633 tlb_flush_page(cpu
, addr
);
640 /* Remove a specific watchpoint. */
641 int cpu_watchpoint_remove(CPUState
*cpu
, vaddr addr
, vaddr len
,
646 QTAILQ_FOREACH(wp
, &cpu
->watchpoints
, entry
) {
647 if (addr
== wp
->vaddr
&& len
== wp
->len
648 && flags
== (wp
->flags
& ~BP_WATCHPOINT_HIT
)) {
649 cpu_watchpoint_remove_by_ref(cpu
, wp
);
656 /* Remove a specific watchpoint by reference. */
657 void cpu_watchpoint_remove_by_ref(CPUState
*cpu
, CPUWatchpoint
*watchpoint
)
659 QTAILQ_REMOVE(&cpu
->watchpoints
, watchpoint
, entry
);
661 tlb_flush_page(cpu
, watchpoint
->vaddr
);
666 /* Remove all matching watchpoints. */
667 void cpu_watchpoint_remove_all(CPUState
*cpu
, int mask
)
669 CPUWatchpoint
*wp
, *next
;
671 QTAILQ_FOREACH_SAFE(wp
, &cpu
->watchpoints
, entry
, next
) {
672 if (wp
->flags
& mask
) {
673 cpu_watchpoint_remove_by_ref(cpu
, wp
);
678 /* Return true if this watchpoint address matches the specified
679 * access (ie the address range covered by the watchpoint overlaps
680 * partially or completely with the address range covered by the
683 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint
*wp
,
687 /* We know the lengths are non-zero, but a little caution is
688 * required to avoid errors in the case where the range ends
689 * exactly at the top of the address space and so addr + len
690 * wraps round to zero.
692 vaddr wpend
= wp
->vaddr
+ wp
->len
- 1;
693 vaddr addrend
= addr
+ len
- 1;
695 return !(addr
> wpend
|| wp
->vaddr
> addrend
);
700 /* Add a breakpoint. */
701 int cpu_breakpoint_insert(CPUState
*cpu
, vaddr pc
, int flags
,
702 CPUBreakpoint
**breakpoint
)
706 bp
= g_malloc(sizeof(*bp
));
711 /* keep all GDB-injected breakpoints in front */
712 if (flags
& BP_GDB
) {
713 QTAILQ_INSERT_HEAD(&cpu
->breakpoints
, bp
, entry
);
715 QTAILQ_INSERT_TAIL(&cpu
->breakpoints
, bp
, entry
);
718 breakpoint_invalidate(cpu
, pc
);
726 /* Remove a specific breakpoint. */
727 int cpu_breakpoint_remove(CPUState
*cpu
, vaddr pc
, int flags
)
731 QTAILQ_FOREACH(bp
, &cpu
->breakpoints
, entry
) {
732 if (bp
->pc
== pc
&& bp
->flags
== flags
) {
733 cpu_breakpoint_remove_by_ref(cpu
, bp
);
740 /* Remove a specific breakpoint by reference. */
741 void cpu_breakpoint_remove_by_ref(CPUState
*cpu
, CPUBreakpoint
*breakpoint
)
743 QTAILQ_REMOVE(&cpu
->breakpoints
, breakpoint
, entry
);
745 breakpoint_invalidate(cpu
, breakpoint
->pc
);
750 /* Remove all matching breakpoints. */
751 void cpu_breakpoint_remove_all(CPUState
*cpu
, int mask
)
753 CPUBreakpoint
*bp
, *next
;
755 QTAILQ_FOREACH_SAFE(bp
, &cpu
->breakpoints
, entry
, next
) {
756 if (bp
->flags
& mask
) {
757 cpu_breakpoint_remove_by_ref(cpu
, bp
);
762 /* enable or disable single step mode. EXCP_DEBUG is returned by the
763 CPU loop after each instruction */
764 void cpu_single_step(CPUState
*cpu
, int enabled
)
766 if (cpu
->singlestep_enabled
!= enabled
) {
767 cpu
->singlestep_enabled
= enabled
;
769 kvm_update_guest_debug(cpu
, 0);
771 /* must flush all the translated code to avoid inconsistencies */
772 /* XXX: only flush what is necessary */
773 CPUArchState
*env
= cpu
->env_ptr
;
779 void cpu_abort(CPUState
*cpu
, const char *fmt
, ...)
786 fprintf(stderr
, "qemu: fatal: ");
787 vfprintf(stderr
, fmt
, ap
);
788 fprintf(stderr
, "\n");
789 cpu_dump_state(cpu
, stderr
, fprintf
, CPU_DUMP_FPU
| CPU_DUMP_CCOP
);
790 if (qemu_log_enabled()) {
791 qemu_log("qemu: fatal: ");
792 qemu_log_vprintf(fmt
, ap2
);
794 log_cpu_state(cpu
, CPU_DUMP_FPU
| CPU_DUMP_CCOP
);
800 #if defined(CONFIG_USER_ONLY)
802 struct sigaction act
;
803 sigfillset(&act
.sa_mask
);
804 act
.sa_handler
= SIG_DFL
;
805 sigaction(SIGABRT
, &act
, NULL
);
811 #if !defined(CONFIG_USER_ONLY)
812 /* Called from RCU critical section */
813 static RAMBlock
*qemu_get_ram_block(ram_addr_t addr
)
817 block
= atomic_rcu_read(&ram_list
.mru_block
);
818 if (block
&& addr
- block
->offset
< block
->max_length
) {
821 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
822 if (addr
- block
->offset
< block
->max_length
) {
827 fprintf(stderr
, "Bad ram offset %" PRIx64
"\n", (uint64_t)addr
);
831 /* It is safe to write mru_block outside the iothread lock. This
836 * xxx removed from list
840 * call_rcu(reclaim_ramblock, xxx);
843 * atomic_rcu_set is not needed here. The block was already published
844 * when it was placed into the list. Here we're just making an extra
845 * copy of the pointer.
847 ram_list
.mru_block
= block
;
851 static void tlb_reset_dirty_range_all(ram_addr_t start
, ram_addr_t length
)
857 end
= TARGET_PAGE_ALIGN(start
+ length
);
858 start
&= TARGET_PAGE_MASK
;
861 block
= qemu_get_ram_block(start
);
862 assert(block
== qemu_get_ram_block(end
- 1));
863 start1
= (uintptr_t)ramblock_ptr(block
, start
- block
->offset
);
864 cpu_tlb_reset_dirty_all(start1
, length
);
868 /* Note: start and end must be within the same ram block. */
869 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start
,
873 unsigned long end
, page
;
880 end
= TARGET_PAGE_ALIGN(start
+ length
) >> TARGET_PAGE_BITS
;
881 page
= start
>> TARGET_PAGE_BITS
;
882 dirty
= bitmap_test_and_clear_atomic(ram_list
.dirty_memory
[client
],
885 if (dirty
&& tcg_enabled()) {
886 tlb_reset_dirty_range_all(start
, length
);
892 /* Called from RCU critical section */
893 hwaddr
memory_region_section_get_iotlb(CPUState
*cpu
,
894 MemoryRegionSection
*section
,
896 hwaddr paddr
, hwaddr xlat
,
898 target_ulong
*address
)
903 if (memory_region_is_ram(section
->mr
)) {
905 iotlb
= (memory_region_get_ram_addr(section
->mr
) & TARGET_PAGE_MASK
)
907 if (!section
->readonly
) {
908 iotlb
|= PHYS_SECTION_NOTDIRTY
;
910 iotlb
|= PHYS_SECTION_ROM
;
913 iotlb
= section
- section
->address_space
->dispatch
->map
.sections
;
917 /* Make accesses to pages with watchpoints go via the
918 watchpoint trap routines. */
919 QTAILQ_FOREACH(wp
, &cpu
->watchpoints
, entry
) {
920 if (cpu_watchpoint_address_matches(wp
, vaddr
, TARGET_PAGE_SIZE
)) {
921 /* Avoid trapping reads of pages with a write breakpoint. */
922 if ((prot
& PAGE_WRITE
) || (wp
->flags
& BP_MEM_READ
)) {
923 iotlb
= PHYS_SECTION_WATCH
+ paddr
;
924 *address
|= TLB_MMIO
;
932 #endif /* defined(CONFIG_USER_ONLY) */
934 #if !defined(CONFIG_USER_ONLY)
936 static int subpage_register (subpage_t
*mmio
, uint32_t start
, uint32_t end
,
938 static subpage_t
*subpage_init(AddressSpace
*as
, hwaddr base
);
940 static void *(*phys_mem_alloc
)(size_t size
, uint64_t *align
) =
944 * Set a custom physical guest memory alloator.
945 * Accelerators with unusual needs may need this. Hopefully, we can
946 * get rid of it eventually.
948 void phys_mem_set_alloc(void *(*alloc
)(size_t, uint64_t *align
))
950 phys_mem_alloc
= alloc
;
953 static uint16_t phys_section_add(PhysPageMap
*map
,
954 MemoryRegionSection
*section
)
956 /* The physical section number is ORed with a page-aligned
957 * pointer to produce the iotlb entries. Thus it should
958 * never overflow into the page-aligned value.
960 assert(map
->sections_nb
< TARGET_PAGE_SIZE
);
962 if (map
->sections_nb
== map
->sections_nb_alloc
) {
963 map
->sections_nb_alloc
= MAX(map
->sections_nb_alloc
* 2, 16);
964 map
->sections
= g_renew(MemoryRegionSection
, map
->sections
,
965 map
->sections_nb_alloc
);
967 map
->sections
[map
->sections_nb
] = *section
;
968 memory_region_ref(section
->mr
);
969 return map
->sections_nb
++;
972 static void phys_section_destroy(MemoryRegion
*mr
)
974 memory_region_unref(mr
);
977 subpage_t
*subpage
= container_of(mr
, subpage_t
, iomem
);
978 object_unref(OBJECT(&subpage
->iomem
));
983 static void phys_sections_free(PhysPageMap
*map
)
985 while (map
->sections_nb
> 0) {
986 MemoryRegionSection
*section
= &map
->sections
[--map
->sections_nb
];
987 phys_section_destroy(section
->mr
);
989 g_free(map
->sections
);
993 static void register_subpage(AddressSpaceDispatch
*d
, MemoryRegionSection
*section
)
996 hwaddr base
= section
->offset_within_address_space
998 MemoryRegionSection
*existing
= phys_page_find(d
->phys_map
, base
,
999 d
->map
.nodes
, d
->map
.sections
);
1000 MemoryRegionSection subsection
= {
1001 .offset_within_address_space
= base
,
1002 .size
= int128_make64(TARGET_PAGE_SIZE
),
1006 assert(existing
->mr
->subpage
|| existing
->mr
== &io_mem_unassigned
);
1008 if (!(existing
->mr
->subpage
)) {
1009 subpage
= subpage_init(d
->as
, base
);
1010 subsection
.address_space
= d
->as
;
1011 subsection
.mr
= &subpage
->iomem
;
1012 phys_page_set(d
, base
>> TARGET_PAGE_BITS
, 1,
1013 phys_section_add(&d
->map
, &subsection
));
1015 subpage
= container_of(existing
->mr
, subpage_t
, iomem
);
1017 start
= section
->offset_within_address_space
& ~TARGET_PAGE_MASK
;
1018 end
= start
+ int128_get64(section
->size
) - 1;
1019 subpage_register(subpage
, start
, end
,
1020 phys_section_add(&d
->map
, section
));
1024 static void register_multipage(AddressSpaceDispatch
*d
,
1025 MemoryRegionSection
*section
)
1027 hwaddr start_addr
= section
->offset_within_address_space
;
1028 uint16_t section_index
= phys_section_add(&d
->map
, section
);
1029 uint64_t num_pages
= int128_get64(int128_rshift(section
->size
,
1033 phys_page_set(d
, start_addr
>> TARGET_PAGE_BITS
, num_pages
, section_index
);
1036 static void mem_add(MemoryListener
*listener
, MemoryRegionSection
*section
)
1038 AddressSpace
*as
= container_of(listener
, AddressSpace
, dispatch_listener
);
1039 AddressSpaceDispatch
*d
= as
->next_dispatch
;
1040 MemoryRegionSection now
= *section
, remain
= *section
;
1041 Int128 page_size
= int128_make64(TARGET_PAGE_SIZE
);
1043 if (now
.offset_within_address_space
& ~TARGET_PAGE_MASK
) {
1044 uint64_t left
= TARGET_PAGE_ALIGN(now
.offset_within_address_space
)
1045 - now
.offset_within_address_space
;
1047 now
.size
= int128_min(int128_make64(left
), now
.size
);
1048 register_subpage(d
, &now
);
1050 now
.size
= int128_zero();
1052 while (int128_ne(remain
.size
, now
.size
)) {
1053 remain
.size
= int128_sub(remain
.size
, now
.size
);
1054 remain
.offset_within_address_space
+= int128_get64(now
.size
);
1055 remain
.offset_within_region
+= int128_get64(now
.size
);
1057 if (int128_lt(remain
.size
, page_size
)) {
1058 register_subpage(d
, &now
);
1059 } else if (remain
.offset_within_address_space
& ~TARGET_PAGE_MASK
) {
1060 now
.size
= page_size
;
1061 register_subpage(d
, &now
);
1063 now
.size
= int128_and(now
.size
, int128_neg(page_size
));
1064 register_multipage(d
, &now
);
1069 void qemu_flush_coalesced_mmio_buffer(void)
1072 kvm_flush_coalesced_mmio_buffer();
1075 void qemu_mutex_lock_ramlist(void)
1077 qemu_mutex_lock(&ram_list
.mutex
);
1080 void qemu_mutex_unlock_ramlist(void)
1082 qemu_mutex_unlock(&ram_list
.mutex
);
1087 #include <sys/vfs.h>
1089 #define HUGETLBFS_MAGIC 0x958458f6
1091 static long gethugepagesize(const char *path
, Error
**errp
)
1097 ret
= statfs(path
, &fs
);
1098 } while (ret
!= 0 && errno
== EINTR
);
1101 error_setg_errno(errp
, errno
, "failed to get page size of file %s",
1106 if (fs
.f_type
!= HUGETLBFS_MAGIC
)
1107 fprintf(stderr
, "Warning: path not on HugeTLBFS: %s\n", path
);
1112 static void *file_ram_alloc(RAMBlock
*block
,
1118 char *sanitized_name
;
1123 Error
*local_err
= NULL
;
1125 hpagesize
= gethugepagesize(path
, &local_err
);
1127 error_propagate(errp
, local_err
);
1130 block
->mr
->align
= hpagesize
;
1132 if (memory
< hpagesize
) {
1133 error_setg(errp
, "memory size 0x" RAM_ADDR_FMT
" must be equal to "
1134 "or larger than huge page size 0x%" PRIx64
,
1139 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1141 "host lacks kvm mmu notifiers, -mem-path unsupported");
1145 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1146 sanitized_name
= g_strdup(memory_region_name(block
->mr
));
1147 for (c
= sanitized_name
; *c
!= '\0'; c
++) {
1152 filename
= g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path
,
1154 g_free(sanitized_name
);
1156 fd
= mkstemp(filename
);
1158 error_setg_errno(errp
, errno
,
1159 "unable to create backing store for hugepages");
1166 memory
= (memory
+hpagesize
-1) & ~(hpagesize
-1);
1169 * ftruncate is not supported by hugetlbfs in older
1170 * hosts, so don't bother bailing out on errors.
1171 * If anything goes wrong with it under other filesystems,
1174 if (ftruncate(fd
, memory
)) {
1175 perror("ftruncate");
1178 area
= mmap(0, memory
, PROT_READ
| PROT_WRITE
,
1179 (block
->flags
& RAM_SHARED
? MAP_SHARED
: MAP_PRIVATE
),
1181 if (area
== MAP_FAILED
) {
1182 error_setg_errno(errp
, errno
,
1183 "unable to map backing store for hugepages");
1189 os_mem_prealloc(fd
, area
, memory
);
1197 error_report("%s", error_get_pretty(*errp
));
1204 /* Called with the ramlist lock held. */
1205 static ram_addr_t
find_ram_offset(ram_addr_t size
)
1207 RAMBlock
*block
, *next_block
;
1208 ram_addr_t offset
= RAM_ADDR_MAX
, mingap
= RAM_ADDR_MAX
;
1210 assert(size
!= 0); /* it would hand out same offset multiple times */
1212 if (QLIST_EMPTY_RCU(&ram_list
.blocks
)) {
1216 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1217 ram_addr_t end
, next
= RAM_ADDR_MAX
;
1219 end
= block
->offset
+ block
->max_length
;
1221 QLIST_FOREACH_RCU(next_block
, &ram_list
.blocks
, next
) {
1222 if (next_block
->offset
>= end
) {
1223 next
= MIN(next
, next_block
->offset
);
1226 if (next
- end
>= size
&& next
- end
< mingap
) {
1228 mingap
= next
- end
;
1232 if (offset
== RAM_ADDR_MAX
) {
1233 fprintf(stderr
, "Failed to find gap of requested size: %" PRIu64
"\n",
1241 ram_addr_t
last_ram_offset(void)
1244 ram_addr_t last
= 0;
1247 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1248 last
= MAX(last
, block
->offset
+ block
->max_length
);
1254 static void qemu_ram_setup_dump(void *addr
, ram_addr_t size
)
1258 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1259 if (!machine_dump_guest_core(current_machine
)) {
1260 ret
= qemu_madvise(addr
, size
, QEMU_MADV_DONTDUMP
);
1262 perror("qemu_madvise");
1263 fprintf(stderr
, "madvise doesn't support MADV_DONTDUMP, "
1264 "but dump_guest_core=off specified\n");
1269 /* Called within an RCU critical section, or while the ramlist lock
1272 static RAMBlock
*find_ram_block(ram_addr_t addr
)
1276 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1277 if (block
->offset
== addr
) {
1285 /* Called with iothread lock held. */
1286 void qemu_ram_set_idstr(ram_addr_t addr
, const char *name
, DeviceState
*dev
)
1288 RAMBlock
*new_block
, *block
;
1291 new_block
= find_ram_block(addr
);
1293 assert(!new_block
->idstr
[0]);
1296 char *id
= qdev_get_dev_path(dev
);
1298 snprintf(new_block
->idstr
, sizeof(new_block
->idstr
), "%s/", id
);
1302 pstrcat(new_block
->idstr
, sizeof(new_block
->idstr
), name
);
1304 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1305 if (block
!= new_block
&& !strcmp(block
->idstr
, new_block
->idstr
)) {
1306 fprintf(stderr
, "RAMBlock \"%s\" already registered, abort!\n",
1314 /* Called with iothread lock held. */
1315 void qemu_ram_unset_idstr(ram_addr_t addr
)
1319 /* FIXME: arch_init.c assumes that this is not called throughout
1320 * migration. Ignore the problem since hot-unplug during migration
1321 * does not work anyway.
1325 block
= find_ram_block(addr
);
1327 memset(block
->idstr
, 0, sizeof(block
->idstr
));
1332 static int memory_try_enable_merging(void *addr
, size_t len
)
1334 if (!machine_mem_merge(current_machine
)) {
1335 /* disabled by the user */
1339 return qemu_madvise(addr
, len
, QEMU_MADV_MERGEABLE
);
1342 /* Only legal before guest might have detected the memory size: e.g. on
1343 * incoming migration, or right after reset.
1345 * As memory core doesn't know how is memory accessed, it is up to
1346 * resize callback to update device state and/or add assertions to detect
1347 * misuse, if necessary.
1349 int qemu_ram_resize(ram_addr_t base
, ram_addr_t newsize
, Error
**errp
)
1351 RAMBlock
*block
= find_ram_block(base
);
1355 newsize
= TARGET_PAGE_ALIGN(newsize
);
1357 if (block
->used_length
== newsize
) {
1361 if (!(block
->flags
& RAM_RESIZEABLE
)) {
1362 error_setg_errno(errp
, EINVAL
,
1363 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1364 " in != 0x" RAM_ADDR_FMT
, block
->idstr
,
1365 newsize
, block
->used_length
);
1369 if (block
->max_length
< newsize
) {
1370 error_setg_errno(errp
, EINVAL
,
1371 "Length too large: %s: 0x" RAM_ADDR_FMT
1372 " > 0x" RAM_ADDR_FMT
, block
->idstr
,
1373 newsize
, block
->max_length
);
1377 cpu_physical_memory_clear_dirty_range(block
->offset
, block
->used_length
);
1378 block
->used_length
= newsize
;
1379 cpu_physical_memory_set_dirty_range(block
->offset
, block
->used_length
,
1381 memory_region_set_size(block
->mr
, newsize
);
1382 if (block
->resized
) {
1383 block
->resized(block
->idstr
, newsize
, block
->host
);
1388 static ram_addr_t
ram_block_add(RAMBlock
*new_block
, Error
**errp
)
1391 RAMBlock
*last_block
= NULL
;
1392 ram_addr_t old_ram_size
, new_ram_size
;
1394 old_ram_size
= last_ram_offset() >> TARGET_PAGE_BITS
;
1396 qemu_mutex_lock_ramlist();
1397 new_block
->offset
= find_ram_offset(new_block
->max_length
);
1399 if (!new_block
->host
) {
1400 if (xen_enabled()) {
1401 xen_ram_alloc(new_block
->offset
, new_block
->max_length
,
1404 new_block
->host
= phys_mem_alloc(new_block
->max_length
,
1405 &new_block
->mr
->align
);
1406 if (!new_block
->host
) {
1407 error_setg_errno(errp
, errno
,
1408 "cannot set up guest memory '%s'",
1409 memory_region_name(new_block
->mr
));
1410 qemu_mutex_unlock_ramlist();
1413 memory_try_enable_merging(new_block
->host
, new_block
->max_length
);
1417 new_ram_size
= MAX(old_ram_size
,
1418 (new_block
->offset
+ new_block
->max_length
) >> TARGET_PAGE_BITS
);
1419 if (new_ram_size
> old_ram_size
) {
1420 migration_bitmap_extend(old_ram_size
, new_ram_size
);
1422 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1423 * QLIST (which has an RCU-friendly variant) does not have insertion at
1424 * tail, so save the last element in last_block.
1426 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1428 if (block
->max_length
< new_block
->max_length
) {
1433 QLIST_INSERT_BEFORE_RCU(block
, new_block
, next
);
1434 } else if (last_block
) {
1435 QLIST_INSERT_AFTER_RCU(last_block
, new_block
, next
);
1436 } else { /* list is empty */
1437 QLIST_INSERT_HEAD_RCU(&ram_list
.blocks
, new_block
, next
);
1439 ram_list
.mru_block
= NULL
;
1441 /* Write list before version */
1444 qemu_mutex_unlock_ramlist();
1446 new_ram_size
= last_ram_offset() >> TARGET_PAGE_BITS
;
1448 if (new_ram_size
> old_ram_size
) {
1451 /* ram_list.dirty_memory[] is protected by the iothread lock. */
1452 for (i
= 0; i
< DIRTY_MEMORY_NUM
; i
++) {
1453 ram_list
.dirty_memory
[i
] =
1454 bitmap_zero_extend(ram_list
.dirty_memory
[i
],
1455 old_ram_size
, new_ram_size
);
1458 cpu_physical_memory_set_dirty_range(new_block
->offset
,
1459 new_block
->used_length
,
1462 if (new_block
->host
) {
1463 qemu_ram_setup_dump(new_block
->host
, new_block
->max_length
);
1464 qemu_madvise(new_block
->host
, new_block
->max_length
, QEMU_MADV_HUGEPAGE
);
1465 qemu_madvise(new_block
->host
, new_block
->max_length
, QEMU_MADV_DONTFORK
);
1466 if (kvm_enabled()) {
1467 kvm_setup_guest_memory(new_block
->host
, new_block
->max_length
);
1471 return new_block
->offset
;
1475 ram_addr_t
qemu_ram_alloc_from_file(ram_addr_t size
, MemoryRegion
*mr
,
1476 bool share
, const char *mem_path
,
1479 RAMBlock
*new_block
;
1481 Error
*local_err
= NULL
;
1483 if (xen_enabled()) {
1484 error_setg(errp
, "-mem-path not supported with Xen");
1488 if (phys_mem_alloc
!= qemu_anon_ram_alloc
) {
1490 * file_ram_alloc() needs to allocate just like
1491 * phys_mem_alloc, but we haven't bothered to provide
1495 "-mem-path not supported with this accelerator");
1499 size
= TARGET_PAGE_ALIGN(size
);
1500 new_block
= g_malloc0(sizeof(*new_block
));
1502 new_block
->used_length
= size
;
1503 new_block
->max_length
= size
;
1504 new_block
->flags
= share
? RAM_SHARED
: 0;
1505 new_block
->host
= file_ram_alloc(new_block
, size
,
1507 if (!new_block
->host
) {
1512 addr
= ram_block_add(new_block
, &local_err
);
1515 error_propagate(errp
, local_err
);
1523 ram_addr_t
qemu_ram_alloc_internal(ram_addr_t size
, ram_addr_t max_size
,
1524 void (*resized
)(const char*,
1527 void *host
, bool resizeable
,
1528 MemoryRegion
*mr
, Error
**errp
)
1530 RAMBlock
*new_block
;
1532 Error
*local_err
= NULL
;
1534 size
= TARGET_PAGE_ALIGN(size
);
1535 max_size
= TARGET_PAGE_ALIGN(max_size
);
1536 new_block
= g_malloc0(sizeof(*new_block
));
1538 new_block
->resized
= resized
;
1539 new_block
->used_length
= size
;
1540 new_block
->max_length
= max_size
;
1541 assert(max_size
>= size
);
1543 new_block
->host
= host
;
1545 new_block
->flags
|= RAM_PREALLOC
;
1548 new_block
->flags
|= RAM_RESIZEABLE
;
1550 addr
= ram_block_add(new_block
, &local_err
);
1553 error_propagate(errp
, local_err
);
1559 ram_addr_t
qemu_ram_alloc_from_ptr(ram_addr_t size
, void *host
,
1560 MemoryRegion
*mr
, Error
**errp
)
1562 return qemu_ram_alloc_internal(size
, size
, NULL
, host
, false, mr
, errp
);
1565 ram_addr_t
qemu_ram_alloc(ram_addr_t size
, MemoryRegion
*mr
, Error
**errp
)
1567 return qemu_ram_alloc_internal(size
, size
, NULL
, NULL
, false, mr
, errp
);
1570 ram_addr_t
qemu_ram_alloc_resizeable(ram_addr_t size
, ram_addr_t maxsz
,
1571 void (*resized
)(const char*,
1574 MemoryRegion
*mr
, Error
**errp
)
1576 return qemu_ram_alloc_internal(size
, maxsz
, resized
, NULL
, true, mr
, errp
);
1579 void qemu_ram_free_from_ptr(ram_addr_t addr
)
1583 qemu_mutex_lock_ramlist();
1584 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1585 if (addr
== block
->offset
) {
1586 QLIST_REMOVE_RCU(block
, next
);
1587 ram_list
.mru_block
= NULL
;
1588 /* Write list before version */
1591 g_free_rcu(block
, rcu
);
1595 qemu_mutex_unlock_ramlist();
1598 static void reclaim_ramblock(RAMBlock
*block
)
1600 if (block
->flags
& RAM_PREALLOC
) {
1602 } else if (xen_enabled()) {
1603 xen_invalidate_map_cache_entry(block
->host
);
1605 } else if (block
->fd
>= 0) {
1606 munmap(block
->host
, block
->max_length
);
1610 qemu_anon_ram_free(block
->host
, block
->max_length
);
1615 void qemu_ram_free(ram_addr_t addr
)
1619 qemu_mutex_lock_ramlist();
1620 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1621 if (addr
== block
->offset
) {
1622 QLIST_REMOVE_RCU(block
, next
);
1623 ram_list
.mru_block
= NULL
;
1624 /* Write list before version */
1627 call_rcu(block
, reclaim_ramblock
, rcu
);
1631 qemu_mutex_unlock_ramlist();
1635 void qemu_ram_remap(ram_addr_t addr
, ram_addr_t length
)
1642 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1643 offset
= addr
- block
->offset
;
1644 if (offset
< block
->max_length
) {
1645 vaddr
= ramblock_ptr(block
, offset
);
1646 if (block
->flags
& RAM_PREALLOC
) {
1648 } else if (xen_enabled()) {
1652 if (block
->fd
>= 0) {
1653 flags
|= (block
->flags
& RAM_SHARED
?
1654 MAP_SHARED
: MAP_PRIVATE
);
1655 area
= mmap(vaddr
, length
, PROT_READ
| PROT_WRITE
,
1656 flags
, block
->fd
, offset
);
1659 * Remap needs to match alloc. Accelerators that
1660 * set phys_mem_alloc never remap. If they did,
1661 * we'd need a remap hook here.
1663 assert(phys_mem_alloc
== qemu_anon_ram_alloc
);
1665 flags
|= MAP_PRIVATE
| MAP_ANONYMOUS
;
1666 area
= mmap(vaddr
, length
, PROT_READ
| PROT_WRITE
,
1669 if (area
!= vaddr
) {
1670 fprintf(stderr
, "Could not remap addr: "
1671 RAM_ADDR_FMT
"@" RAM_ADDR_FMT
"\n",
1675 memory_try_enable_merging(vaddr
, length
);
1676 qemu_ram_setup_dump(vaddr
, length
);
1681 #endif /* !_WIN32 */
1683 int qemu_get_ram_fd(ram_addr_t addr
)
1689 block
= qemu_get_ram_block(addr
);
1695 void *qemu_get_ram_block_host_ptr(ram_addr_t addr
)
1701 block
= qemu_get_ram_block(addr
);
1702 ptr
= ramblock_ptr(block
, 0);
1707 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1708 * This should not be used for general purpose DMA. Use address_space_map
1709 * or address_space_rw instead. For local memory (e.g. video ram) that the
1710 * device owns, use memory_region_get_ram_ptr.
1712 * By the time this function returns, the returned pointer is not protected
1713 * by RCU anymore. If the caller is not within an RCU critical section and
1714 * does not hold the iothread lock, it must have other means of protecting the
1715 * pointer, such as a reference to the region that includes the incoming
1718 void *qemu_get_ram_ptr(ram_addr_t addr
)
1724 block
= qemu_get_ram_block(addr
);
1726 if (xen_enabled() && block
->host
== NULL
) {
1727 /* We need to check if the requested address is in the RAM
1728 * because we don't want to map the entire memory in QEMU.
1729 * In that case just map until the end of the page.
1731 if (block
->offset
== 0) {
1732 ptr
= xen_map_cache(addr
, 0, 0);
1736 block
->host
= xen_map_cache(block
->offset
, block
->max_length
, 1);
1738 ptr
= ramblock_ptr(block
, addr
- block
->offset
);
1745 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1746 * but takes a size argument.
1748 * By the time this function returns, the returned pointer is not protected
1749 * by RCU anymore. If the caller is not within an RCU critical section and
1750 * does not hold the iothread lock, it must have other means of protecting the
1751 * pointer, such as a reference to the region that includes the incoming
1754 static void *qemu_ram_ptr_length(ram_addr_t addr
, hwaddr
*size
)
1760 if (xen_enabled()) {
1761 return xen_map_cache(addr
, *size
, 1);
1765 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1766 if (addr
- block
->offset
< block
->max_length
) {
1767 if (addr
- block
->offset
+ *size
> block
->max_length
)
1768 *size
= block
->max_length
- addr
+ block
->offset
;
1769 ptr
= ramblock_ptr(block
, addr
- block
->offset
);
1775 fprintf(stderr
, "Bad ram offset %" PRIx64
"\n", (uint64_t)addr
);
1780 /* Some of the softmmu routines need to translate from a host pointer
1781 * (typically a TLB entry) back to a ram offset.
1783 * By the time this function returns, the returned pointer is not protected
1784 * by RCU anymore. If the caller is not within an RCU critical section and
1785 * does not hold the iothread lock, it must have other means of protecting the
1786 * pointer, such as a reference to the region that includes the incoming
1789 MemoryRegion
*qemu_ram_addr_from_host(void *ptr
, ram_addr_t
*ram_addr
)
1792 uint8_t *host
= ptr
;
1795 if (xen_enabled()) {
1797 *ram_addr
= xen_ram_addr_from_mapcache(ptr
);
1798 mr
= qemu_get_ram_block(*ram_addr
)->mr
;
1804 block
= atomic_rcu_read(&ram_list
.mru_block
);
1805 if (block
&& block
->host
&& host
- block
->host
< block
->max_length
) {
1809 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1810 /* This case append when the block is not mapped. */
1811 if (block
->host
== NULL
) {
1814 if (host
- block
->host
< block
->max_length
) {
1823 *ram_addr
= block
->offset
+ (host
- block
->host
);
1829 static void notdirty_mem_write(void *opaque
, hwaddr ram_addr
,
1830 uint64_t val
, unsigned size
)
1832 if (!cpu_physical_memory_get_dirty_flag(ram_addr
, DIRTY_MEMORY_CODE
)) {
1833 tb_invalidate_phys_page_fast(ram_addr
, size
);
1837 stb_p(qemu_get_ram_ptr(ram_addr
), val
);
1840 stw_p(qemu_get_ram_ptr(ram_addr
), val
);
1843 stl_p(qemu_get_ram_ptr(ram_addr
), val
);
1848 /* Set both VGA and migration bits for simplicity and to remove
1849 * the notdirty callback faster.
1851 cpu_physical_memory_set_dirty_range(ram_addr
, size
,
1852 DIRTY_CLIENTS_NOCODE
);
1853 /* we remove the notdirty callback only if the code has been
1855 if (!cpu_physical_memory_is_clean(ram_addr
)) {
1856 CPUArchState
*env
= current_cpu
->env_ptr
;
1857 tlb_set_dirty(env
, current_cpu
->mem_io_vaddr
);
1861 static bool notdirty_mem_accepts(void *opaque
, hwaddr addr
,
1862 unsigned size
, bool is_write
)
1867 static const MemoryRegionOps notdirty_mem_ops
= {
1868 .write
= notdirty_mem_write
,
1869 .valid
.accepts
= notdirty_mem_accepts
,
1870 .endianness
= DEVICE_NATIVE_ENDIAN
,
1873 /* Generate a debug exception if a watchpoint has been hit. */
1874 static void check_watchpoint(int offset
, int len
, MemTxAttrs attrs
, int flags
)
1876 CPUState
*cpu
= current_cpu
;
1877 CPUArchState
*env
= cpu
->env_ptr
;
1878 target_ulong pc
, cs_base
;
1883 if (cpu
->watchpoint_hit
) {
1884 /* We re-entered the check after replacing the TB. Now raise
1885 * the debug interrupt so that is will trigger after the
1886 * current instruction. */
1887 cpu_interrupt(cpu
, CPU_INTERRUPT_DEBUG
);
1890 vaddr
= (cpu
->mem_io_vaddr
& TARGET_PAGE_MASK
) + offset
;
1891 QTAILQ_FOREACH(wp
, &cpu
->watchpoints
, entry
) {
1892 if (cpu_watchpoint_address_matches(wp
, vaddr
, len
)
1893 && (wp
->flags
& flags
)) {
1894 if (flags
== BP_MEM_READ
) {
1895 wp
->flags
|= BP_WATCHPOINT_HIT_READ
;
1897 wp
->flags
|= BP_WATCHPOINT_HIT_WRITE
;
1899 wp
->hitaddr
= vaddr
;
1900 wp
->hitattrs
= attrs
;
1901 if (!cpu
->watchpoint_hit
) {
1902 cpu
->watchpoint_hit
= wp
;
1903 tb_check_watchpoint(cpu
);
1904 if (wp
->flags
& BP_STOP_BEFORE_ACCESS
) {
1905 cpu
->exception_index
= EXCP_DEBUG
;
1908 cpu_get_tb_cpu_state(env
, &pc
, &cs_base
, &cpu_flags
);
1909 tb_gen_code(cpu
, pc
, cs_base
, cpu_flags
, 1);
1910 cpu_resume_from_signal(cpu
, NULL
);
1914 wp
->flags
&= ~BP_WATCHPOINT_HIT
;
1919 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1920 so these check for a hit then pass through to the normal out-of-line
1922 static MemTxResult
watch_mem_read(void *opaque
, hwaddr addr
, uint64_t *pdata
,
1923 unsigned size
, MemTxAttrs attrs
)
1928 check_watchpoint(addr
& ~TARGET_PAGE_MASK
, size
, attrs
, BP_MEM_READ
);
1931 data
= address_space_ldub(&address_space_memory
, addr
, attrs
, &res
);
1934 data
= address_space_lduw(&address_space_memory
, addr
, attrs
, &res
);
1937 data
= address_space_ldl(&address_space_memory
, addr
, attrs
, &res
);
1945 static MemTxResult
watch_mem_write(void *opaque
, hwaddr addr
,
1946 uint64_t val
, unsigned size
,
1951 check_watchpoint(addr
& ~TARGET_PAGE_MASK
, size
, attrs
, BP_MEM_WRITE
);
1954 address_space_stb(&address_space_memory
, addr
, val
, attrs
, &res
);
1957 address_space_stw(&address_space_memory
, addr
, val
, attrs
, &res
);
1960 address_space_stl(&address_space_memory
, addr
, val
, attrs
, &res
);
1967 static const MemoryRegionOps watch_mem_ops
= {
1968 .read_with_attrs
= watch_mem_read
,
1969 .write_with_attrs
= watch_mem_write
,
1970 .endianness
= DEVICE_NATIVE_ENDIAN
,
1973 static MemTxResult
subpage_read(void *opaque
, hwaddr addr
, uint64_t *data
,
1974 unsigned len
, MemTxAttrs attrs
)
1976 subpage_t
*subpage
= opaque
;
1980 #if defined(DEBUG_SUBPAGE)
1981 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
"\n", __func__
,
1982 subpage
, len
, addr
);
1984 res
= address_space_read(subpage
->as
, addr
+ subpage
->base
,
1991 *data
= ldub_p(buf
);
1994 *data
= lduw_p(buf
);
2007 static MemTxResult
subpage_write(void *opaque
, hwaddr addr
,
2008 uint64_t value
, unsigned len
, MemTxAttrs attrs
)
2010 subpage_t
*subpage
= opaque
;
2013 #if defined(DEBUG_SUBPAGE)
2014 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2015 " value %"PRIx64
"\n",
2016 __func__
, subpage
, len
, addr
, value
);
2034 return address_space_write(subpage
->as
, addr
+ subpage
->base
,
2038 static bool subpage_accepts(void *opaque
, hwaddr addr
,
2039 unsigned len
, bool is_write
)
2041 subpage_t
*subpage
= opaque
;
2042 #if defined(DEBUG_SUBPAGE)
2043 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx
"\n",
2044 __func__
, subpage
, is_write
? 'w' : 'r', len
, addr
);
2047 return address_space_access_valid(subpage
->as
, addr
+ subpage
->base
,
2051 static const MemoryRegionOps subpage_ops
= {
2052 .read_with_attrs
= subpage_read
,
2053 .write_with_attrs
= subpage_write
,
2054 .impl
.min_access_size
= 1,
2055 .impl
.max_access_size
= 8,
2056 .valid
.min_access_size
= 1,
2057 .valid
.max_access_size
= 8,
2058 .valid
.accepts
= subpage_accepts
,
2059 .endianness
= DEVICE_NATIVE_ENDIAN
,
2062 static int subpage_register (subpage_t
*mmio
, uint32_t start
, uint32_t end
,
2067 if (start
>= TARGET_PAGE_SIZE
|| end
>= TARGET_PAGE_SIZE
)
2069 idx
= SUBPAGE_IDX(start
);
2070 eidx
= SUBPAGE_IDX(end
);
2071 #if defined(DEBUG_SUBPAGE)
2072 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2073 __func__
, mmio
, start
, end
, idx
, eidx
, section
);
2075 for (; idx
<= eidx
; idx
++) {
2076 mmio
->sub_section
[idx
] = section
;
2082 static subpage_t
*subpage_init(AddressSpace
*as
, hwaddr base
)
2086 mmio
= g_malloc0(sizeof(subpage_t
));
2090 memory_region_init_io(&mmio
->iomem
, NULL
, &subpage_ops
, mmio
,
2091 NULL
, TARGET_PAGE_SIZE
);
2092 mmio
->iomem
.subpage
= true;
2093 #if defined(DEBUG_SUBPAGE)
2094 printf("%s: %p base " TARGET_FMT_plx
" len %08x\n", __func__
,
2095 mmio
, base
, TARGET_PAGE_SIZE
);
2097 subpage_register(mmio
, 0, TARGET_PAGE_SIZE
-1, PHYS_SECTION_UNASSIGNED
);
2102 static uint16_t dummy_section(PhysPageMap
*map
, AddressSpace
*as
,
2106 MemoryRegionSection section
= {
2107 .address_space
= as
,
2109 .offset_within_address_space
= 0,
2110 .offset_within_region
= 0,
2111 .size
= int128_2_64(),
2114 return phys_section_add(map
, §ion
);
2117 MemoryRegion
*iotlb_to_region(CPUState
*cpu
, hwaddr index
)
2119 AddressSpaceDispatch
*d
= atomic_rcu_read(&cpu
->memory_dispatch
);
2120 MemoryRegionSection
*sections
= d
->map
.sections
;
2122 return sections
[index
& ~TARGET_PAGE_MASK
].mr
;
2125 static void io_mem_init(void)
2127 memory_region_init_io(&io_mem_rom
, NULL
, &unassigned_mem_ops
, NULL
, NULL
, UINT64_MAX
);
2128 memory_region_init_io(&io_mem_unassigned
, NULL
, &unassigned_mem_ops
, NULL
,
2130 memory_region_init_io(&io_mem_notdirty
, NULL
, ¬dirty_mem_ops
, NULL
,
2132 memory_region_init_io(&io_mem_watch
, NULL
, &watch_mem_ops
, NULL
,
2136 static void mem_begin(MemoryListener
*listener
)
2138 AddressSpace
*as
= container_of(listener
, AddressSpace
, dispatch_listener
);
2139 AddressSpaceDispatch
*d
= g_new0(AddressSpaceDispatch
, 1);
2142 n
= dummy_section(&d
->map
, as
, &io_mem_unassigned
);
2143 assert(n
== PHYS_SECTION_UNASSIGNED
);
2144 n
= dummy_section(&d
->map
, as
, &io_mem_notdirty
);
2145 assert(n
== PHYS_SECTION_NOTDIRTY
);
2146 n
= dummy_section(&d
->map
, as
, &io_mem_rom
);
2147 assert(n
== PHYS_SECTION_ROM
);
2148 n
= dummy_section(&d
->map
, as
, &io_mem_watch
);
2149 assert(n
== PHYS_SECTION_WATCH
);
2151 d
->phys_map
= (PhysPageEntry
) { .ptr
= PHYS_MAP_NODE_NIL
, .skip
= 1 };
2153 as
->next_dispatch
= d
;
2156 static void address_space_dispatch_free(AddressSpaceDispatch
*d
)
2158 phys_sections_free(&d
->map
);
2162 static void mem_commit(MemoryListener
*listener
)
2164 AddressSpace
*as
= container_of(listener
, AddressSpace
, dispatch_listener
);
2165 AddressSpaceDispatch
*cur
= as
->dispatch
;
2166 AddressSpaceDispatch
*next
= as
->next_dispatch
;
2168 phys_page_compact_all(next
, next
->map
.nodes_nb
);
2170 atomic_rcu_set(&as
->dispatch
, next
);
2172 call_rcu(cur
, address_space_dispatch_free
, rcu
);
2176 static void tcg_commit(MemoryListener
*listener
)
2180 /* since each CPU stores ram addresses in its TLB cache, we must
2181 reset the modified entries */
2184 /* FIXME: Disentangle the cpu.h circular files deps so we can
2185 directly get the right CPU from listener. */
2186 if (cpu
->tcg_as_listener
!= listener
) {
2189 cpu_reload_memory_map(cpu
);
2193 void address_space_init_dispatch(AddressSpace
*as
)
2195 as
->dispatch
= NULL
;
2196 as
->dispatch_listener
= (MemoryListener
) {
2198 .commit
= mem_commit
,
2199 .region_add
= mem_add
,
2200 .region_nop
= mem_add
,
2203 memory_listener_register(&as
->dispatch_listener
, as
);
2206 void address_space_unregister(AddressSpace
*as
)
2208 memory_listener_unregister(&as
->dispatch_listener
);
2211 void address_space_destroy_dispatch(AddressSpace
*as
)
2213 AddressSpaceDispatch
*d
= as
->dispatch
;
2215 atomic_rcu_set(&as
->dispatch
, NULL
);
2217 call_rcu(d
, address_space_dispatch_free
, rcu
);
2221 static void memory_map_init(void)
2223 system_memory
= g_malloc(sizeof(*system_memory
));
2225 memory_region_init(system_memory
, NULL
, "system", UINT64_MAX
);
2226 address_space_init(&address_space_memory
, system_memory
, "memory");
2228 system_io
= g_malloc(sizeof(*system_io
));
2229 memory_region_init_io(system_io
, NULL
, &unassigned_io_ops
, NULL
, "io",
2231 address_space_init(&address_space_io
, system_io
, "I/O");
2234 MemoryRegion
*get_system_memory(void)
2236 return system_memory
;
2239 MemoryRegion
*get_system_io(void)
2244 #endif /* !defined(CONFIG_USER_ONLY) */
2246 /* physical memory access (slow version, mainly for debug) */
2247 #if defined(CONFIG_USER_ONLY)
2248 int cpu_memory_rw_debug(CPUState
*cpu
, target_ulong addr
,
2249 uint8_t *buf
, int len
, int is_write
)
2256 page
= addr
& TARGET_PAGE_MASK
;
2257 l
= (page
+ TARGET_PAGE_SIZE
) - addr
;
2260 flags
= page_get_flags(page
);
2261 if (!(flags
& PAGE_VALID
))
2264 if (!(flags
& PAGE_WRITE
))
2266 /* XXX: this code should not depend on lock_user */
2267 if (!(p
= lock_user(VERIFY_WRITE
, addr
, l
, 0)))
2270 unlock_user(p
, addr
, l
);
2272 if (!(flags
& PAGE_READ
))
2274 /* XXX: this code should not depend on lock_user */
2275 if (!(p
= lock_user(VERIFY_READ
, addr
, l
, 1)))
2278 unlock_user(p
, addr
, 0);
2289 static void invalidate_and_set_dirty(MemoryRegion
*mr
, hwaddr addr
,
2292 uint8_t dirty_log_mask
= memory_region_get_dirty_log_mask(mr
);
2293 /* No early return if dirty_log_mask is or becomes 0, because
2294 * cpu_physical_memory_set_dirty_range will still call
2295 * xen_modified_memory.
2297 if (dirty_log_mask
) {
2299 cpu_physical_memory_range_includes_clean(addr
, length
, dirty_log_mask
);
2301 if (dirty_log_mask
& (1 << DIRTY_MEMORY_CODE
)) {
2302 tb_invalidate_phys_range(addr
, addr
+ length
);
2303 dirty_log_mask
&= ~(1 << DIRTY_MEMORY_CODE
);
2305 cpu_physical_memory_set_dirty_range(addr
, length
, dirty_log_mask
);
2308 static int memory_access_size(MemoryRegion
*mr
, unsigned l
, hwaddr addr
)
2310 unsigned access_size_max
= mr
->ops
->valid
.max_access_size
;
2312 /* Regions are assumed to support 1-4 byte accesses unless
2313 otherwise specified. */
2314 if (access_size_max
== 0) {
2315 access_size_max
= 4;
2318 /* Bound the maximum access by the alignment of the address. */
2319 if (!mr
->ops
->impl
.unaligned
) {
2320 unsigned align_size_max
= addr
& -addr
;
2321 if (align_size_max
!= 0 && align_size_max
< access_size_max
) {
2322 access_size_max
= align_size_max
;
2326 /* Don't attempt accesses larger than the maximum. */
2327 if (l
> access_size_max
) {
2328 l
= access_size_max
;
2331 l
= 1 << (qemu_fls(l
) - 1);
2337 static bool prepare_mmio_access(MemoryRegion
*mr
)
2339 bool unlocked
= !qemu_mutex_iothread_locked();
2340 bool release_lock
= false;
2342 if (unlocked
&& mr
->global_locking
) {
2343 qemu_mutex_lock_iothread();
2345 release_lock
= true;
2347 if (mr
->flush_coalesced_mmio
) {
2349 qemu_mutex_lock_iothread();
2351 qemu_flush_coalesced_mmio_buffer();
2353 qemu_mutex_unlock_iothread();
2357 return release_lock
;
2360 MemTxResult
address_space_rw(AddressSpace
*as
, hwaddr addr
, MemTxAttrs attrs
,
2361 uint8_t *buf
, int len
, bool is_write
)
2368 MemTxResult result
= MEMTX_OK
;
2369 bool release_lock
= false;
2374 mr
= address_space_translate(as
, addr
, &addr1
, &l
, is_write
);
2377 if (!memory_access_is_direct(mr
, is_write
)) {
2378 release_lock
|= prepare_mmio_access(mr
);
2379 l
= memory_access_size(mr
, l
, addr1
);
2380 /* XXX: could force current_cpu to NULL to avoid
2384 /* 64 bit write access */
2386 result
|= memory_region_dispatch_write(mr
, addr1
, val
, 8,
2390 /* 32 bit write access */
2392 result
|= memory_region_dispatch_write(mr
, addr1
, val
, 4,
2396 /* 16 bit write access */
2398 result
|= memory_region_dispatch_write(mr
, addr1
, val
, 2,
2402 /* 8 bit write access */
2404 result
|= memory_region_dispatch_write(mr
, addr1
, val
, 1,
2411 addr1
+= memory_region_get_ram_addr(mr
);
2413 ptr
= qemu_get_ram_ptr(addr1
);
2414 memcpy(ptr
, buf
, l
);
2415 invalidate_and_set_dirty(mr
, addr1
, l
);
2418 if (!memory_access_is_direct(mr
, is_write
)) {
2420 release_lock
|= prepare_mmio_access(mr
);
2421 l
= memory_access_size(mr
, l
, addr1
);
2424 /* 64 bit read access */
2425 result
|= memory_region_dispatch_read(mr
, addr1
, &val
, 8,
2430 /* 32 bit read access */
2431 result
|= memory_region_dispatch_read(mr
, addr1
, &val
, 4,
2436 /* 16 bit read access */
2437 result
|= memory_region_dispatch_read(mr
, addr1
, &val
, 2,
2442 /* 8 bit read access */
2443 result
|= memory_region_dispatch_read(mr
, addr1
, &val
, 1,
2452 ptr
= qemu_get_ram_ptr(mr
->ram_addr
+ addr1
);
2453 memcpy(buf
, ptr
, l
);
2458 qemu_mutex_unlock_iothread();
2459 release_lock
= false;
2471 MemTxResult
address_space_write(AddressSpace
*as
, hwaddr addr
, MemTxAttrs attrs
,
2472 const uint8_t *buf
, int len
)
2474 return address_space_rw(as
, addr
, attrs
, (uint8_t *)buf
, len
, true);
2477 MemTxResult
address_space_read(AddressSpace
*as
, hwaddr addr
, MemTxAttrs attrs
,
2478 uint8_t *buf
, int len
)
2480 return address_space_rw(as
, addr
, attrs
, buf
, len
, false);
2484 void cpu_physical_memory_rw(hwaddr addr
, uint8_t *buf
,
2485 int len
, int is_write
)
2487 address_space_rw(&address_space_memory
, addr
, MEMTXATTRS_UNSPECIFIED
,
2488 buf
, len
, is_write
);
2491 enum write_rom_type
{
2496 static inline void cpu_physical_memory_write_rom_internal(AddressSpace
*as
,
2497 hwaddr addr
, const uint8_t *buf
, int len
, enum write_rom_type type
)
2507 mr
= address_space_translate(as
, addr
, &addr1
, &l
, true);
2509 if (!(memory_region_is_ram(mr
) ||
2510 memory_region_is_romd(mr
))) {
2511 l
= memory_access_size(mr
, l
, addr1
);
2513 addr1
+= memory_region_get_ram_addr(mr
);
2515 ptr
= qemu_get_ram_ptr(addr1
);
2518 memcpy(ptr
, buf
, l
);
2519 invalidate_and_set_dirty(mr
, addr1
, l
);
2522 flush_icache_range((uintptr_t)ptr
, (uintptr_t)ptr
+ l
);
2533 /* used for ROM loading : can write in RAM and ROM */
2534 void cpu_physical_memory_write_rom(AddressSpace
*as
, hwaddr addr
,
2535 const uint8_t *buf
, int len
)
2537 cpu_physical_memory_write_rom_internal(as
, addr
, buf
, len
, WRITE_DATA
);
2540 void cpu_flush_icache_range(hwaddr start
, int len
)
2543 * This function should do the same thing as an icache flush that was
2544 * triggered from within the guest. For TCG we are always cache coherent,
2545 * so there is no need to flush anything. For KVM / Xen we need to flush
2546 * the host's instruction cache at least.
2548 if (tcg_enabled()) {
2552 cpu_physical_memory_write_rom_internal(&address_space_memory
,
2553 start
, NULL
, len
, FLUSH_CACHE
);
2564 static BounceBuffer bounce
;
2566 typedef struct MapClient
{
2568 QLIST_ENTRY(MapClient
) link
;
2571 QemuMutex map_client_list_lock
;
2572 static QLIST_HEAD(map_client_list
, MapClient
) map_client_list
2573 = QLIST_HEAD_INITIALIZER(map_client_list
);
2575 static void cpu_unregister_map_client_do(MapClient
*client
)
2577 QLIST_REMOVE(client
, link
);
2581 static void cpu_notify_map_clients_locked(void)
2585 while (!QLIST_EMPTY(&map_client_list
)) {
2586 client
= QLIST_FIRST(&map_client_list
);
2587 qemu_bh_schedule(client
->bh
);
2588 cpu_unregister_map_client_do(client
);
2592 void cpu_register_map_client(QEMUBH
*bh
)
2594 MapClient
*client
= g_malloc(sizeof(*client
));
2596 qemu_mutex_lock(&map_client_list_lock
);
2598 QLIST_INSERT_HEAD(&map_client_list
, client
, link
);
2599 if (!atomic_read(&bounce
.in_use
)) {
2600 cpu_notify_map_clients_locked();
2602 qemu_mutex_unlock(&map_client_list_lock
);
2605 void cpu_exec_init_all(void)
2607 qemu_mutex_init(&ram_list
.mutex
);
2610 qemu_mutex_init(&map_client_list_lock
);
2613 void cpu_unregister_map_client(QEMUBH
*bh
)
2617 qemu_mutex_lock(&map_client_list_lock
);
2618 QLIST_FOREACH(client
, &map_client_list
, link
) {
2619 if (client
->bh
== bh
) {
2620 cpu_unregister_map_client_do(client
);
2624 qemu_mutex_unlock(&map_client_list_lock
);
2627 static void cpu_notify_map_clients(void)
2629 qemu_mutex_lock(&map_client_list_lock
);
2630 cpu_notify_map_clients_locked();
2631 qemu_mutex_unlock(&map_client_list_lock
);
2634 bool address_space_access_valid(AddressSpace
*as
, hwaddr addr
, int len
, bool is_write
)
2642 mr
= address_space_translate(as
, addr
, &xlat
, &l
, is_write
);
2643 if (!memory_access_is_direct(mr
, is_write
)) {
2644 l
= memory_access_size(mr
, l
, addr
);
2645 if (!memory_region_access_valid(mr
, xlat
, l
, is_write
)) {
2657 /* Map a physical memory region into a host virtual address.
2658 * May map a subset of the requested range, given by and returned in *plen.
2659 * May return NULL if resources needed to perform the mapping are exhausted.
2660 * Use only for reads OR writes - not for read-modify-write operations.
2661 * Use cpu_register_map_client() to know when retrying the map operation is
2662 * likely to succeed.
2664 void *address_space_map(AddressSpace
*as
,
2671 hwaddr l
, xlat
, base
;
2672 MemoryRegion
*mr
, *this_mr
;
2681 mr
= address_space_translate(as
, addr
, &xlat
, &l
, is_write
);
2683 if (!memory_access_is_direct(mr
, is_write
)) {
2684 if (atomic_xchg(&bounce
.in_use
, true)) {
2688 /* Avoid unbounded allocations */
2689 l
= MIN(l
, TARGET_PAGE_SIZE
);
2690 bounce
.buffer
= qemu_memalign(TARGET_PAGE_SIZE
, l
);
2694 memory_region_ref(mr
);
2697 address_space_read(as
, addr
, MEMTXATTRS_UNSPECIFIED
,
2703 return bounce
.buffer
;
2707 raddr
= memory_region_get_ram_addr(mr
);
2718 this_mr
= address_space_translate(as
, addr
, &xlat
, &l
, is_write
);
2719 if (this_mr
!= mr
|| xlat
!= base
+ done
) {
2724 memory_region_ref(mr
);
2727 return qemu_ram_ptr_length(raddr
+ base
, plen
);
2730 /* Unmaps a memory region previously mapped by address_space_map().
2731 * Will also mark the memory as dirty if is_write == 1. access_len gives
2732 * the amount of memory that was actually read or written by the caller.
2734 void address_space_unmap(AddressSpace
*as
, void *buffer
, hwaddr len
,
2735 int is_write
, hwaddr access_len
)
2737 if (buffer
!= bounce
.buffer
) {
2741 mr
= qemu_ram_addr_from_host(buffer
, &addr1
);
2744 invalidate_and_set_dirty(mr
, addr1
, access_len
);
2746 if (xen_enabled()) {
2747 xen_invalidate_map_cache_entry(buffer
);
2749 memory_region_unref(mr
);
2753 address_space_write(as
, bounce
.addr
, MEMTXATTRS_UNSPECIFIED
,
2754 bounce
.buffer
, access_len
);
2756 qemu_vfree(bounce
.buffer
);
2757 bounce
.buffer
= NULL
;
2758 memory_region_unref(bounce
.mr
);
2759 atomic_mb_set(&bounce
.in_use
, false);
2760 cpu_notify_map_clients();
2763 void *cpu_physical_memory_map(hwaddr addr
,
2767 return address_space_map(&address_space_memory
, addr
, plen
, is_write
);
2770 void cpu_physical_memory_unmap(void *buffer
, hwaddr len
,
2771 int is_write
, hwaddr access_len
)
2773 return address_space_unmap(&address_space_memory
, buffer
, len
, is_write
, access_len
);
2776 /* warning: addr must be aligned */
2777 static inline uint32_t address_space_ldl_internal(AddressSpace
*as
, hwaddr addr
,
2779 MemTxResult
*result
,
2780 enum device_endian endian
)
2788 bool release_lock
= false;
2791 mr
= address_space_translate(as
, addr
, &addr1
, &l
, false);
2792 if (l
< 4 || !memory_access_is_direct(mr
, false)) {
2793 release_lock
|= prepare_mmio_access(mr
);
2796 r
= memory_region_dispatch_read(mr
, addr1
, &val
, 4, attrs
);
2797 #if defined(TARGET_WORDS_BIGENDIAN)
2798 if (endian
== DEVICE_LITTLE_ENDIAN
) {
2802 if (endian
== DEVICE_BIG_ENDIAN
) {
2808 ptr
= qemu_get_ram_ptr((memory_region_get_ram_addr(mr
)
2812 case DEVICE_LITTLE_ENDIAN
:
2813 val
= ldl_le_p(ptr
);
2815 case DEVICE_BIG_ENDIAN
:
2816 val
= ldl_be_p(ptr
);
2828 qemu_mutex_unlock_iothread();
2834 uint32_t address_space_ldl(AddressSpace
*as
, hwaddr addr
,
2835 MemTxAttrs attrs
, MemTxResult
*result
)
2837 return address_space_ldl_internal(as
, addr
, attrs
, result
,
2838 DEVICE_NATIVE_ENDIAN
);
2841 uint32_t address_space_ldl_le(AddressSpace
*as
, hwaddr addr
,
2842 MemTxAttrs attrs
, MemTxResult
*result
)
2844 return address_space_ldl_internal(as
, addr
, attrs
, result
,
2845 DEVICE_LITTLE_ENDIAN
);
2848 uint32_t address_space_ldl_be(AddressSpace
*as
, hwaddr addr
,
2849 MemTxAttrs attrs
, MemTxResult
*result
)
2851 return address_space_ldl_internal(as
, addr
, attrs
, result
,
2855 uint32_t ldl_phys(AddressSpace
*as
, hwaddr addr
)
2857 return address_space_ldl(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
2860 uint32_t ldl_le_phys(AddressSpace
*as
, hwaddr addr
)
2862 return address_space_ldl_le(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
2865 uint32_t ldl_be_phys(AddressSpace
*as
, hwaddr addr
)
2867 return address_space_ldl_be(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
2870 /* warning: addr must be aligned */
2871 static inline uint64_t address_space_ldq_internal(AddressSpace
*as
, hwaddr addr
,
2873 MemTxResult
*result
,
2874 enum device_endian endian
)
2882 bool release_lock
= false;
2885 mr
= address_space_translate(as
, addr
, &addr1
, &l
,
2887 if (l
< 8 || !memory_access_is_direct(mr
, false)) {
2888 release_lock
|= prepare_mmio_access(mr
);
2891 r
= memory_region_dispatch_read(mr
, addr1
, &val
, 8, attrs
);
2892 #if defined(TARGET_WORDS_BIGENDIAN)
2893 if (endian
== DEVICE_LITTLE_ENDIAN
) {
2897 if (endian
== DEVICE_BIG_ENDIAN
) {
2903 ptr
= qemu_get_ram_ptr((memory_region_get_ram_addr(mr
)
2907 case DEVICE_LITTLE_ENDIAN
:
2908 val
= ldq_le_p(ptr
);
2910 case DEVICE_BIG_ENDIAN
:
2911 val
= ldq_be_p(ptr
);
2923 qemu_mutex_unlock_iothread();
2929 uint64_t address_space_ldq(AddressSpace
*as
, hwaddr addr
,
2930 MemTxAttrs attrs
, MemTxResult
*result
)
2932 return address_space_ldq_internal(as
, addr
, attrs
, result
,
2933 DEVICE_NATIVE_ENDIAN
);
2936 uint64_t address_space_ldq_le(AddressSpace
*as
, hwaddr addr
,
2937 MemTxAttrs attrs
, MemTxResult
*result
)
2939 return address_space_ldq_internal(as
, addr
, attrs
, result
,
2940 DEVICE_LITTLE_ENDIAN
);
2943 uint64_t address_space_ldq_be(AddressSpace
*as
, hwaddr addr
,
2944 MemTxAttrs attrs
, MemTxResult
*result
)
2946 return address_space_ldq_internal(as
, addr
, attrs
, result
,
2950 uint64_t ldq_phys(AddressSpace
*as
, hwaddr addr
)
2952 return address_space_ldq(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
2955 uint64_t ldq_le_phys(AddressSpace
*as
, hwaddr addr
)
2957 return address_space_ldq_le(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
2960 uint64_t ldq_be_phys(AddressSpace
*as
, hwaddr addr
)
2962 return address_space_ldq_be(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
2966 uint32_t address_space_ldub(AddressSpace
*as
, hwaddr addr
,
2967 MemTxAttrs attrs
, MemTxResult
*result
)
2972 r
= address_space_rw(as
, addr
, attrs
, &val
, 1, 0);
2979 uint32_t ldub_phys(AddressSpace
*as
, hwaddr addr
)
2981 return address_space_ldub(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
2984 /* warning: addr must be aligned */
2985 static inline uint32_t address_space_lduw_internal(AddressSpace
*as
,
2988 MemTxResult
*result
,
2989 enum device_endian endian
)
2997 bool release_lock
= false;
3000 mr
= address_space_translate(as
, addr
, &addr1
, &l
,
3002 if (l
< 2 || !memory_access_is_direct(mr
, false)) {
3003 release_lock
|= prepare_mmio_access(mr
);
3006 r
= memory_region_dispatch_read(mr
, addr1
, &val
, 2, attrs
);
3007 #if defined(TARGET_WORDS_BIGENDIAN)
3008 if (endian
== DEVICE_LITTLE_ENDIAN
) {
3012 if (endian
== DEVICE_BIG_ENDIAN
) {
3018 ptr
= qemu_get_ram_ptr((memory_region_get_ram_addr(mr
)
3022 case DEVICE_LITTLE_ENDIAN
:
3023 val
= lduw_le_p(ptr
);
3025 case DEVICE_BIG_ENDIAN
:
3026 val
= lduw_be_p(ptr
);
3038 qemu_mutex_unlock_iothread();
3044 uint32_t address_space_lduw(AddressSpace
*as
, hwaddr addr
,
3045 MemTxAttrs attrs
, MemTxResult
*result
)
3047 return address_space_lduw_internal(as
, addr
, attrs
, result
,
3048 DEVICE_NATIVE_ENDIAN
);
3051 uint32_t address_space_lduw_le(AddressSpace
*as
, hwaddr addr
,
3052 MemTxAttrs attrs
, MemTxResult
*result
)
3054 return address_space_lduw_internal(as
, addr
, attrs
, result
,
3055 DEVICE_LITTLE_ENDIAN
);
3058 uint32_t address_space_lduw_be(AddressSpace
*as
, hwaddr addr
,
3059 MemTxAttrs attrs
, MemTxResult
*result
)
3061 return address_space_lduw_internal(as
, addr
, attrs
, result
,
3065 uint32_t lduw_phys(AddressSpace
*as
, hwaddr addr
)
3067 return address_space_lduw(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3070 uint32_t lduw_le_phys(AddressSpace
*as
, hwaddr addr
)
3072 return address_space_lduw_le(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3075 uint32_t lduw_be_phys(AddressSpace
*as
, hwaddr addr
)
3077 return address_space_lduw_be(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3080 /* warning: addr must be aligned. The ram page is not masked as dirty
3081 and the code inside is not invalidated. It is useful if the dirty
3082 bits are used to track modified PTEs */
3083 void address_space_stl_notdirty(AddressSpace
*as
, hwaddr addr
, uint32_t val
,
3084 MemTxAttrs attrs
, MemTxResult
*result
)
3091 uint8_t dirty_log_mask
;
3092 bool release_lock
= false;
3095 mr
= address_space_translate(as
, addr
, &addr1
, &l
,
3097 if (l
< 4 || !memory_access_is_direct(mr
, true)) {
3098 release_lock
|= prepare_mmio_access(mr
);
3100 r
= memory_region_dispatch_write(mr
, addr1
, val
, 4, attrs
);
3102 addr1
+= memory_region_get_ram_addr(mr
) & TARGET_PAGE_MASK
;
3103 ptr
= qemu_get_ram_ptr(addr1
);
3106 dirty_log_mask
= memory_region_get_dirty_log_mask(mr
);
3107 dirty_log_mask
&= ~(1 << DIRTY_MEMORY_CODE
);
3108 cpu_physical_memory_set_dirty_range(addr1
, 4, dirty_log_mask
);
3115 qemu_mutex_unlock_iothread();
3120 void stl_phys_notdirty(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
3122 address_space_stl_notdirty(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3125 /* warning: addr must be aligned */
3126 static inline void address_space_stl_internal(AddressSpace
*as
,
3127 hwaddr addr
, uint32_t val
,
3129 MemTxResult
*result
,
3130 enum device_endian endian
)
3137 bool release_lock
= false;
3140 mr
= address_space_translate(as
, addr
, &addr1
, &l
,
3142 if (l
< 4 || !memory_access_is_direct(mr
, true)) {
3143 release_lock
|= prepare_mmio_access(mr
);
3145 #if defined(TARGET_WORDS_BIGENDIAN)
3146 if (endian
== DEVICE_LITTLE_ENDIAN
) {
3150 if (endian
== DEVICE_BIG_ENDIAN
) {
3154 r
= memory_region_dispatch_write(mr
, addr1
, val
, 4, attrs
);
3157 addr1
+= memory_region_get_ram_addr(mr
) & TARGET_PAGE_MASK
;
3158 ptr
= qemu_get_ram_ptr(addr1
);
3160 case DEVICE_LITTLE_ENDIAN
:
3163 case DEVICE_BIG_ENDIAN
:
3170 invalidate_and_set_dirty(mr
, addr1
, 4);
3177 qemu_mutex_unlock_iothread();
3182 void address_space_stl(AddressSpace
*as
, hwaddr addr
, uint32_t val
,
3183 MemTxAttrs attrs
, MemTxResult
*result
)
3185 address_space_stl_internal(as
, addr
, val
, attrs
, result
,
3186 DEVICE_NATIVE_ENDIAN
);
3189 void address_space_stl_le(AddressSpace
*as
, hwaddr addr
, uint32_t val
,
3190 MemTxAttrs attrs
, MemTxResult
*result
)
3192 address_space_stl_internal(as
, addr
, val
, attrs
, result
,
3193 DEVICE_LITTLE_ENDIAN
);
3196 void address_space_stl_be(AddressSpace
*as
, hwaddr addr
, uint32_t val
,
3197 MemTxAttrs attrs
, MemTxResult
*result
)
3199 address_space_stl_internal(as
, addr
, val
, attrs
, result
,
3203 void stl_phys(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
3205 address_space_stl(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3208 void stl_le_phys(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
3210 address_space_stl_le(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3213 void stl_be_phys(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
3215 address_space_stl_be(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3219 void address_space_stb(AddressSpace
*as
, hwaddr addr
, uint32_t val
,
3220 MemTxAttrs attrs
, MemTxResult
*result
)
3225 r
= address_space_rw(as
, addr
, attrs
, &v
, 1, 1);
3231 void stb_phys(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
3233 address_space_stb(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3236 /* warning: addr must be aligned */
3237 static inline void address_space_stw_internal(AddressSpace
*as
,
3238 hwaddr addr
, uint32_t val
,
3240 MemTxResult
*result
,
3241 enum device_endian endian
)
3248 bool release_lock
= false;
3251 mr
= address_space_translate(as
, addr
, &addr1
, &l
, true);
3252 if (l
< 2 || !memory_access_is_direct(mr
, true)) {
3253 release_lock
|= prepare_mmio_access(mr
);
3255 #if defined(TARGET_WORDS_BIGENDIAN)
3256 if (endian
== DEVICE_LITTLE_ENDIAN
) {
3260 if (endian
== DEVICE_BIG_ENDIAN
) {
3264 r
= memory_region_dispatch_write(mr
, addr1
, val
, 2, attrs
);
3267 addr1
+= memory_region_get_ram_addr(mr
) & TARGET_PAGE_MASK
;
3268 ptr
= qemu_get_ram_ptr(addr1
);
3270 case DEVICE_LITTLE_ENDIAN
:
3273 case DEVICE_BIG_ENDIAN
:
3280 invalidate_and_set_dirty(mr
, addr1
, 2);
3287 qemu_mutex_unlock_iothread();
3292 void address_space_stw(AddressSpace
*as
, hwaddr addr
, uint32_t val
,
3293 MemTxAttrs attrs
, MemTxResult
*result
)
3295 address_space_stw_internal(as
, addr
, val
, attrs
, result
,
3296 DEVICE_NATIVE_ENDIAN
);
3299 void address_space_stw_le(AddressSpace
*as
, hwaddr addr
, uint32_t val
,
3300 MemTxAttrs attrs
, MemTxResult
*result
)
3302 address_space_stw_internal(as
, addr
, val
, attrs
, result
,
3303 DEVICE_LITTLE_ENDIAN
);
3306 void address_space_stw_be(AddressSpace
*as
, hwaddr addr
, uint32_t val
,
3307 MemTxAttrs attrs
, MemTxResult
*result
)
3309 address_space_stw_internal(as
, addr
, val
, attrs
, result
,
3313 void stw_phys(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
3315 address_space_stw(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3318 void stw_le_phys(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
3320 address_space_stw_le(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3323 void stw_be_phys(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
3325 address_space_stw_be(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3329 void address_space_stq(AddressSpace
*as
, hwaddr addr
, uint64_t val
,
3330 MemTxAttrs attrs
, MemTxResult
*result
)
3334 r
= address_space_rw(as
, addr
, attrs
, (void *) &val
, 8, 1);
3340 void address_space_stq_le(AddressSpace
*as
, hwaddr addr
, uint64_t val
,
3341 MemTxAttrs attrs
, MemTxResult
*result
)
3344 val
= cpu_to_le64(val
);
3345 r
= address_space_rw(as
, addr
, attrs
, (void *) &val
, 8, 1);
3350 void address_space_stq_be(AddressSpace
*as
, hwaddr addr
, uint64_t val
,
3351 MemTxAttrs attrs
, MemTxResult
*result
)
3354 val
= cpu_to_be64(val
);
3355 r
= address_space_rw(as
, addr
, attrs
, (void *) &val
, 8, 1);
3361 void stq_phys(AddressSpace
*as
, hwaddr addr
, uint64_t val
)
3363 address_space_stq(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3366 void stq_le_phys(AddressSpace
*as
, hwaddr addr
, uint64_t val
)
3368 address_space_stq_le(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3371 void stq_be_phys(AddressSpace
*as
, hwaddr addr
, uint64_t val
)
3373 address_space_stq_be(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3376 /* virtual memory access for debug (includes writing to ROM) */
3377 int cpu_memory_rw_debug(CPUState
*cpu
, target_ulong addr
,
3378 uint8_t *buf
, int len
, int is_write
)
3385 page
= addr
& TARGET_PAGE_MASK
;
3386 phys_addr
= cpu_get_phys_page_debug(cpu
, page
);
3387 /* if no physical page mapped, return an error */
3388 if (phys_addr
== -1)
3390 l
= (page
+ TARGET_PAGE_SIZE
) - addr
;
3393 phys_addr
+= (addr
& ~TARGET_PAGE_MASK
);
3395 cpu_physical_memory_write_rom(cpu
->as
, phys_addr
, buf
, l
);
3397 address_space_rw(cpu
->as
, phys_addr
, MEMTXATTRS_UNSPECIFIED
,
3409 * A helper function for the _utterly broken_ virtio device model to find out if
3410 * it's running on a big endian machine. Don't do this at home kids!
3412 bool target_words_bigendian(void);
3413 bool target_words_bigendian(void)
3415 #if defined(TARGET_WORDS_BIGENDIAN)
3422 #ifndef CONFIG_USER_ONLY
3423 bool cpu_physical_memory_is_io(hwaddr phys_addr
)
3430 mr
= address_space_translate(&address_space_memory
,
3431 phys_addr
, &phys_addr
, &l
, false);
3433 res
= !(memory_region_is_ram(mr
) || memory_region_is_romd(mr
));
3438 int qemu_ram_foreach_block(RAMBlockIterFunc func
, void *opaque
)
3444 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
3445 ret
= func(block
->idstr
, block
->host
, block
->offset
,
3446 block
->used_length
, opaque
);