4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21 #include <sys/types.h>
25 #include "qemu-common.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
33 #include "qemu/osdep.h"
34 #include "sysemu/kvm.h"
35 #include "sysemu/sysemu.h"
36 #include "hw/xen/xen.h"
37 #include "qemu/timer.h"
38 #include "qemu/config-file.h"
39 #include "qemu/error-report.h"
40 #include "exec/memory.h"
41 #include "sysemu/dma.h"
42 #include "exec/address-spaces.h"
43 #if defined(CONFIG_USER_ONLY)
45 #else /* !CONFIG_USER_ONLY */
46 #include "sysemu/xen-mapcache.h"
49 #include "exec/cpu-all.h"
50 #include "qemu/rcu_queue.h"
51 #include "exec/cputlb.h"
52 #include "translate-all.h"
54 #include "exec/memory-internal.h"
55 #include "exec/ram_addr.h"
57 #include "qemu/range.h"
59 //#define DEBUG_SUBPAGE
61 #if !defined(CONFIG_USER_ONLY)
62 static bool in_migration
;
64 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
65 * are protected by the ramlist lock.
67 RAMList ram_list
= { .blocks
= QLIST_HEAD_INITIALIZER(ram_list
.blocks
) };
69 static MemoryRegion
*system_memory
;
70 static MemoryRegion
*system_io
;
72 AddressSpace address_space_io
;
73 AddressSpace address_space_memory
;
75 MemoryRegion io_mem_rom
, io_mem_notdirty
;
76 static MemoryRegion io_mem_unassigned
;
78 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
79 #define RAM_PREALLOC (1 << 0)
81 /* RAM is mmap-ed with MAP_SHARED */
82 #define RAM_SHARED (1 << 1)
84 /* Only a portion of RAM (used_length) is actually used, and migrated.
85 * This used_length size can change across reboots.
87 #define RAM_RESIZEABLE (1 << 2)
91 struct CPUTailQ cpus
= QTAILQ_HEAD_INITIALIZER(cpus
);
92 /* current CPU in the current thread. It is only valid inside
94 DEFINE_TLS(CPUState
*, current_cpu
);
95 /* 0 = Do not count executed instructions.
96 1 = Precise instruction counting.
97 2 = Adaptive rate instruction counting. */
100 #if !defined(CONFIG_USER_ONLY)
102 typedef struct PhysPageEntry PhysPageEntry
;
104 struct PhysPageEntry
{
105 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
107 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
111 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
113 /* Size of the L2 (and L3, etc) page tables. */
114 #define ADDR_SPACE_BITS 64
117 #define P_L2_SIZE (1 << P_L2_BITS)
119 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
121 typedef PhysPageEntry Node
[P_L2_SIZE
];
123 typedef struct PhysPageMap
{
126 unsigned sections_nb
;
127 unsigned sections_nb_alloc
;
129 unsigned nodes_nb_alloc
;
131 MemoryRegionSection
*sections
;
134 struct AddressSpaceDispatch
{
137 /* This is a multi-level map on the physical address space.
138 * The bottom level has pointers to MemoryRegionSections.
140 PhysPageEntry phys_map
;
145 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
146 typedef struct subpage_t
{
150 uint16_t sub_section
[TARGET_PAGE_SIZE
];
153 #define PHYS_SECTION_UNASSIGNED 0
154 #define PHYS_SECTION_NOTDIRTY 1
155 #define PHYS_SECTION_ROM 2
156 #define PHYS_SECTION_WATCH 3
158 static void io_mem_init(void);
159 static void memory_map_init(void);
160 static void tcg_commit(MemoryListener
*listener
);
162 static MemoryRegion io_mem_watch
;
165 #if !defined(CONFIG_USER_ONLY)
167 static void phys_map_node_reserve(PhysPageMap
*map
, unsigned nodes
)
169 if (map
->nodes_nb
+ nodes
> map
->nodes_nb_alloc
) {
170 map
->nodes_nb_alloc
= MAX(map
->nodes_nb_alloc
* 2, 16);
171 map
->nodes_nb_alloc
= MAX(map
->nodes_nb_alloc
, map
->nodes_nb
+ nodes
);
172 map
->nodes
= g_renew(Node
, map
->nodes
, map
->nodes_nb_alloc
);
176 static uint32_t phys_map_node_alloc(PhysPageMap
*map
)
181 ret
= map
->nodes_nb
++;
182 assert(ret
!= PHYS_MAP_NODE_NIL
);
183 assert(ret
!= map
->nodes_nb_alloc
);
184 for (i
= 0; i
< P_L2_SIZE
; ++i
) {
185 map
->nodes
[ret
][i
].skip
= 1;
186 map
->nodes
[ret
][i
].ptr
= PHYS_MAP_NODE_NIL
;
191 static void phys_page_set_level(PhysPageMap
*map
, PhysPageEntry
*lp
,
192 hwaddr
*index
, hwaddr
*nb
, uint16_t leaf
,
197 hwaddr step
= (hwaddr
)1 << (level
* P_L2_BITS
);
199 if (lp
->skip
&& lp
->ptr
== PHYS_MAP_NODE_NIL
) {
200 lp
->ptr
= phys_map_node_alloc(map
);
201 p
= map
->nodes
[lp
->ptr
];
203 for (i
= 0; i
< P_L2_SIZE
; i
++) {
205 p
[i
].ptr
= PHYS_SECTION_UNASSIGNED
;
209 p
= map
->nodes
[lp
->ptr
];
211 lp
= &p
[(*index
>> (level
* P_L2_BITS
)) & (P_L2_SIZE
- 1)];
213 while (*nb
&& lp
< &p
[P_L2_SIZE
]) {
214 if ((*index
& (step
- 1)) == 0 && *nb
>= step
) {
220 phys_page_set_level(map
, lp
, index
, nb
, leaf
, level
- 1);
226 static void phys_page_set(AddressSpaceDispatch
*d
,
227 hwaddr index
, hwaddr nb
,
230 /* Wildly overreserve - it doesn't matter much. */
231 phys_map_node_reserve(&d
->map
, 3 * P_L2_LEVELS
);
233 phys_page_set_level(&d
->map
, &d
->phys_map
, &index
, &nb
, leaf
, P_L2_LEVELS
- 1);
236 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
237 * and update our entry so we can skip it and go directly to the destination.
239 static void phys_page_compact(PhysPageEntry
*lp
, Node
*nodes
, unsigned long *compacted
)
241 unsigned valid_ptr
= P_L2_SIZE
;
246 if (lp
->ptr
== PHYS_MAP_NODE_NIL
) {
251 for (i
= 0; i
< P_L2_SIZE
; i
++) {
252 if (p
[i
].ptr
== PHYS_MAP_NODE_NIL
) {
259 phys_page_compact(&p
[i
], nodes
, compacted
);
263 /* We can only compress if there's only one child. */
268 assert(valid_ptr
< P_L2_SIZE
);
270 /* Don't compress if it won't fit in the # of bits we have. */
271 if (lp
->skip
+ p
[valid_ptr
].skip
>= (1 << 3)) {
275 lp
->ptr
= p
[valid_ptr
].ptr
;
276 if (!p
[valid_ptr
].skip
) {
277 /* If our only child is a leaf, make this a leaf. */
278 /* By design, we should have made this node a leaf to begin with so we
279 * should never reach here.
280 * But since it's so simple to handle this, let's do it just in case we
285 lp
->skip
+= p
[valid_ptr
].skip
;
289 static void phys_page_compact_all(AddressSpaceDispatch
*d
, int nodes_nb
)
291 DECLARE_BITMAP(compacted
, nodes_nb
);
293 if (d
->phys_map
.skip
) {
294 phys_page_compact(&d
->phys_map
, d
->map
.nodes
, compacted
);
298 static MemoryRegionSection
*phys_page_find(PhysPageEntry lp
, hwaddr addr
,
299 Node
*nodes
, MemoryRegionSection
*sections
)
302 hwaddr index
= addr
>> TARGET_PAGE_BITS
;
305 for (i
= P_L2_LEVELS
; lp
.skip
&& (i
-= lp
.skip
) >= 0;) {
306 if (lp
.ptr
== PHYS_MAP_NODE_NIL
) {
307 return §ions
[PHYS_SECTION_UNASSIGNED
];
310 lp
= p
[(index
>> (i
* P_L2_BITS
)) & (P_L2_SIZE
- 1)];
313 if (sections
[lp
.ptr
].size
.hi
||
314 range_covers_byte(sections
[lp
.ptr
].offset_within_address_space
,
315 sections
[lp
.ptr
].size
.lo
, addr
)) {
316 return §ions
[lp
.ptr
];
318 return §ions
[PHYS_SECTION_UNASSIGNED
];
322 bool memory_region_is_unassigned(MemoryRegion
*mr
)
324 return mr
!= &io_mem_rom
&& mr
!= &io_mem_notdirty
&& !mr
->rom_device
325 && mr
!= &io_mem_watch
;
328 /* Called from RCU critical section */
329 static MemoryRegionSection
*address_space_lookup_region(AddressSpaceDispatch
*d
,
331 bool resolve_subpage
)
333 MemoryRegionSection
*section
;
336 section
= phys_page_find(d
->phys_map
, addr
, d
->map
.nodes
, d
->map
.sections
);
337 if (resolve_subpage
&& section
->mr
->subpage
) {
338 subpage
= container_of(section
->mr
, subpage_t
, iomem
);
339 section
= &d
->map
.sections
[subpage
->sub_section
[SUBPAGE_IDX(addr
)]];
344 /* Called from RCU critical section */
345 static MemoryRegionSection
*
346 address_space_translate_internal(AddressSpaceDispatch
*d
, hwaddr addr
, hwaddr
*xlat
,
347 hwaddr
*plen
, bool resolve_subpage
)
349 MemoryRegionSection
*section
;
352 section
= address_space_lookup_region(d
, addr
, resolve_subpage
);
353 /* Compute offset within MemoryRegionSection */
354 addr
-= section
->offset_within_address_space
;
356 /* Compute offset within MemoryRegion */
357 *xlat
= addr
+ section
->offset_within_region
;
359 diff
= int128_sub(section
->mr
->size
, int128_make64(addr
));
360 *plen
= int128_get64(int128_min(diff
, int128_make64(*plen
)));
364 static inline bool memory_access_is_direct(MemoryRegion
*mr
, bool is_write
)
366 if (memory_region_is_ram(mr
)) {
367 return !(is_write
&& mr
->readonly
);
369 if (memory_region_is_romd(mr
)) {
376 /* Called from RCU critical section */
377 MemoryRegion
*address_space_translate(AddressSpace
*as
, hwaddr addr
,
378 hwaddr
*xlat
, hwaddr
*plen
,
382 MemoryRegionSection
*section
;
386 AddressSpaceDispatch
*d
= atomic_rcu_read(&as
->dispatch
);
387 section
= address_space_translate_internal(d
, addr
, &addr
, plen
, true);
390 if (!mr
->iommu_ops
) {
394 iotlb
= mr
->iommu_ops
->translate(mr
, addr
, is_write
);
395 addr
= ((iotlb
.translated_addr
& ~iotlb
.addr_mask
)
396 | (addr
& iotlb
.addr_mask
));
397 *plen
= MIN(*plen
, (addr
| iotlb
.addr_mask
) - addr
+ 1);
398 if (!(iotlb
.perm
& (1 << is_write
))) {
399 mr
= &io_mem_unassigned
;
403 as
= iotlb
.target_as
;
406 if (xen_enabled() && memory_access_is_direct(mr
, is_write
)) {
407 hwaddr page
= ((addr
& TARGET_PAGE_MASK
) + TARGET_PAGE_SIZE
) - addr
;
408 *plen
= MIN(page
, *plen
);
415 /* Called from RCU critical section */
416 MemoryRegionSection
*
417 address_space_translate_for_iotlb(CPUState
*cpu
, hwaddr addr
,
418 hwaddr
*xlat
, hwaddr
*plen
)
420 MemoryRegionSection
*section
;
421 section
= address_space_translate_internal(cpu
->memory_dispatch
,
422 addr
, xlat
, plen
, false);
424 assert(!section
->mr
->iommu_ops
);
429 #if !defined(CONFIG_USER_ONLY)
431 static int cpu_common_post_load(void *opaque
, int version_id
)
433 CPUState
*cpu
= opaque
;
435 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
436 version_id is increased. */
437 cpu
->interrupt_request
&= ~0x01;
443 static int cpu_common_pre_load(void *opaque
)
445 CPUState
*cpu
= opaque
;
447 cpu
->exception_index
= -1;
452 static bool cpu_common_exception_index_needed(void *opaque
)
454 CPUState
*cpu
= opaque
;
456 return tcg_enabled() && cpu
->exception_index
!= -1;
459 static const VMStateDescription vmstate_cpu_common_exception_index
= {
460 .name
= "cpu_common/exception_index",
462 .minimum_version_id
= 1,
463 .fields
= (VMStateField
[]) {
464 VMSTATE_INT32(exception_index
, CPUState
),
465 VMSTATE_END_OF_LIST()
469 const VMStateDescription vmstate_cpu_common
= {
470 .name
= "cpu_common",
472 .minimum_version_id
= 1,
473 .pre_load
= cpu_common_pre_load
,
474 .post_load
= cpu_common_post_load
,
475 .fields
= (VMStateField
[]) {
476 VMSTATE_UINT32(halted
, CPUState
),
477 VMSTATE_UINT32(interrupt_request
, CPUState
),
478 VMSTATE_END_OF_LIST()
480 .subsections
= (VMStateSubsection
[]) {
482 .vmsd
= &vmstate_cpu_common_exception_index
,
483 .needed
= cpu_common_exception_index_needed
,
492 CPUState
*qemu_get_cpu(int index
)
497 if (cpu
->cpu_index
== index
) {
505 #if !defined(CONFIG_USER_ONLY)
506 void tcg_cpu_address_space_init(CPUState
*cpu
, AddressSpace
*as
)
508 /* We only support one address space per cpu at the moment. */
509 assert(cpu
->as
== as
);
511 if (cpu
->tcg_as_listener
) {
512 memory_listener_unregister(cpu
->tcg_as_listener
);
514 cpu
->tcg_as_listener
= g_new0(MemoryListener
, 1);
516 cpu
->tcg_as_listener
->commit
= tcg_commit
;
517 memory_listener_register(cpu
->tcg_as_listener
, as
);
521 void cpu_exec_init(CPUArchState
*env
)
523 CPUState
*cpu
= ENV_GET_CPU(env
);
524 CPUClass
*cc
= CPU_GET_CLASS(cpu
);
528 #if defined(CONFIG_USER_ONLY)
532 CPU_FOREACH(some_cpu
) {
535 cpu
->cpu_index
= cpu_index
;
537 QTAILQ_INIT(&cpu
->breakpoints
);
538 QTAILQ_INIT(&cpu
->watchpoints
);
539 #ifndef CONFIG_USER_ONLY
540 cpu
->as
= &address_space_memory
;
541 cpu
->thread_id
= qemu_get_thread_id();
542 cpu_reload_memory_map(cpu
);
544 QTAILQ_INSERT_TAIL(&cpus
, cpu
, node
);
545 #if defined(CONFIG_USER_ONLY)
548 if (qdev_get_vmsd(DEVICE(cpu
)) == NULL
) {
549 vmstate_register(NULL
, cpu_index
, &vmstate_cpu_common
, cpu
);
551 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
552 register_savevm(NULL
, "cpu", cpu_index
, CPU_SAVE_VERSION
,
553 cpu_save
, cpu_load
, env
);
554 assert(cc
->vmsd
== NULL
);
555 assert(qdev_get_vmsd(DEVICE(cpu
)) == NULL
);
557 if (cc
->vmsd
!= NULL
) {
558 vmstate_register(NULL
, cpu_index
, cc
->vmsd
, cpu
);
562 #if defined(CONFIG_USER_ONLY)
563 static void breakpoint_invalidate(CPUState
*cpu
, target_ulong pc
)
565 tb_invalidate_phys_page_range(pc
, pc
+ 1, 0);
568 static void breakpoint_invalidate(CPUState
*cpu
, target_ulong pc
)
570 hwaddr phys
= cpu_get_phys_page_debug(cpu
, pc
);
572 tb_invalidate_phys_addr(cpu
->as
,
573 phys
| (pc
& ~TARGET_PAGE_MASK
));
578 #if defined(CONFIG_USER_ONLY)
579 void cpu_watchpoint_remove_all(CPUState
*cpu
, int mask
)
584 int cpu_watchpoint_remove(CPUState
*cpu
, vaddr addr
, vaddr len
,
590 void cpu_watchpoint_remove_by_ref(CPUState
*cpu
, CPUWatchpoint
*watchpoint
)
594 int cpu_watchpoint_insert(CPUState
*cpu
, vaddr addr
, vaddr len
,
595 int flags
, CPUWatchpoint
**watchpoint
)
600 /* Add a watchpoint. */
601 int cpu_watchpoint_insert(CPUState
*cpu
, vaddr addr
, vaddr len
,
602 int flags
, CPUWatchpoint
**watchpoint
)
606 /* forbid ranges which are empty or run off the end of the address space */
607 if (len
== 0 || (addr
+ len
- 1) < addr
) {
608 error_report("tried to set invalid watchpoint at %"
609 VADDR_PRIx
", len=%" VADDR_PRIu
, addr
, len
);
612 wp
= g_malloc(sizeof(*wp
));
618 /* keep all GDB-injected watchpoints in front */
619 if (flags
& BP_GDB
) {
620 QTAILQ_INSERT_HEAD(&cpu
->watchpoints
, wp
, entry
);
622 QTAILQ_INSERT_TAIL(&cpu
->watchpoints
, wp
, entry
);
625 tlb_flush_page(cpu
, addr
);
632 /* Remove a specific watchpoint. */
633 int cpu_watchpoint_remove(CPUState
*cpu
, vaddr addr
, vaddr len
,
638 QTAILQ_FOREACH(wp
, &cpu
->watchpoints
, entry
) {
639 if (addr
== wp
->vaddr
&& len
== wp
->len
640 && flags
== (wp
->flags
& ~BP_WATCHPOINT_HIT
)) {
641 cpu_watchpoint_remove_by_ref(cpu
, wp
);
648 /* Remove a specific watchpoint by reference. */
649 void cpu_watchpoint_remove_by_ref(CPUState
*cpu
, CPUWatchpoint
*watchpoint
)
651 QTAILQ_REMOVE(&cpu
->watchpoints
, watchpoint
, entry
);
653 tlb_flush_page(cpu
, watchpoint
->vaddr
);
658 /* Remove all matching watchpoints. */
659 void cpu_watchpoint_remove_all(CPUState
*cpu
, int mask
)
661 CPUWatchpoint
*wp
, *next
;
663 QTAILQ_FOREACH_SAFE(wp
, &cpu
->watchpoints
, entry
, next
) {
664 if (wp
->flags
& mask
) {
665 cpu_watchpoint_remove_by_ref(cpu
, wp
);
670 /* Return true if this watchpoint address matches the specified
671 * access (ie the address range covered by the watchpoint overlaps
672 * partially or completely with the address range covered by the
675 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint
*wp
,
679 /* We know the lengths are non-zero, but a little caution is
680 * required to avoid errors in the case where the range ends
681 * exactly at the top of the address space and so addr + len
682 * wraps round to zero.
684 vaddr wpend
= wp
->vaddr
+ wp
->len
- 1;
685 vaddr addrend
= addr
+ len
- 1;
687 return !(addr
> wpend
|| wp
->vaddr
> addrend
);
692 /* Add a breakpoint. */
693 int cpu_breakpoint_insert(CPUState
*cpu
, vaddr pc
, int flags
,
694 CPUBreakpoint
**breakpoint
)
698 bp
= g_malloc(sizeof(*bp
));
703 /* keep all GDB-injected breakpoints in front */
704 if (flags
& BP_GDB
) {
705 QTAILQ_INSERT_HEAD(&cpu
->breakpoints
, bp
, entry
);
707 QTAILQ_INSERT_TAIL(&cpu
->breakpoints
, bp
, entry
);
710 breakpoint_invalidate(cpu
, pc
);
718 /* Remove a specific breakpoint. */
719 int cpu_breakpoint_remove(CPUState
*cpu
, vaddr pc
, int flags
)
723 QTAILQ_FOREACH(bp
, &cpu
->breakpoints
, entry
) {
724 if (bp
->pc
== pc
&& bp
->flags
== flags
) {
725 cpu_breakpoint_remove_by_ref(cpu
, bp
);
732 /* Remove a specific breakpoint by reference. */
733 void cpu_breakpoint_remove_by_ref(CPUState
*cpu
, CPUBreakpoint
*breakpoint
)
735 QTAILQ_REMOVE(&cpu
->breakpoints
, breakpoint
, entry
);
737 breakpoint_invalidate(cpu
, breakpoint
->pc
);
742 /* Remove all matching breakpoints. */
743 void cpu_breakpoint_remove_all(CPUState
*cpu
, int mask
)
745 CPUBreakpoint
*bp
, *next
;
747 QTAILQ_FOREACH_SAFE(bp
, &cpu
->breakpoints
, entry
, next
) {
748 if (bp
->flags
& mask
) {
749 cpu_breakpoint_remove_by_ref(cpu
, bp
);
754 /* enable or disable single step mode. EXCP_DEBUG is returned by the
755 CPU loop after each instruction */
756 void cpu_single_step(CPUState
*cpu
, int enabled
)
758 if (cpu
->singlestep_enabled
!= enabled
) {
759 cpu
->singlestep_enabled
= enabled
;
761 kvm_update_guest_debug(cpu
, 0);
763 /* must flush all the translated code to avoid inconsistencies */
764 /* XXX: only flush what is necessary */
765 CPUArchState
*env
= cpu
->env_ptr
;
771 void cpu_abort(CPUState
*cpu
, const char *fmt
, ...)
778 fprintf(stderr
, "qemu: fatal: ");
779 vfprintf(stderr
, fmt
, ap
);
780 fprintf(stderr
, "\n");
781 cpu_dump_state(cpu
, stderr
, fprintf
, CPU_DUMP_FPU
| CPU_DUMP_CCOP
);
782 if (qemu_log_enabled()) {
783 qemu_log("qemu: fatal: ");
784 qemu_log_vprintf(fmt
, ap2
);
786 log_cpu_state(cpu
, CPU_DUMP_FPU
| CPU_DUMP_CCOP
);
792 #if defined(CONFIG_USER_ONLY)
794 struct sigaction act
;
795 sigfillset(&act
.sa_mask
);
796 act
.sa_handler
= SIG_DFL
;
797 sigaction(SIGABRT
, &act
, NULL
);
803 #if !defined(CONFIG_USER_ONLY)
804 /* Called from RCU critical section */
805 static RAMBlock
*qemu_get_ram_block(ram_addr_t addr
)
809 block
= atomic_rcu_read(&ram_list
.mru_block
);
810 if (block
&& addr
- block
->offset
< block
->max_length
) {
813 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
814 if (addr
- block
->offset
< block
->max_length
) {
819 fprintf(stderr
, "Bad ram offset %" PRIx64
"\n", (uint64_t)addr
);
823 /* It is safe to write mru_block outside the iothread lock. This
828 * xxx removed from list
832 * call_rcu(reclaim_ramblock, xxx);
835 * atomic_rcu_set is not needed here. The block was already published
836 * when it was placed into the list. Here we're just making an extra
837 * copy of the pointer.
839 ram_list
.mru_block
= block
;
843 static void tlb_reset_dirty_range_all(ram_addr_t start
, ram_addr_t length
)
849 end
= TARGET_PAGE_ALIGN(start
+ length
);
850 start
&= TARGET_PAGE_MASK
;
853 block
= qemu_get_ram_block(start
);
854 assert(block
== qemu_get_ram_block(end
- 1));
855 start1
= (uintptr_t)ramblock_ptr(block
, start
- block
->offset
);
856 cpu_tlb_reset_dirty_all(start1
, length
);
860 /* Note: start and end must be within the same ram block. */
861 void cpu_physical_memory_reset_dirty(ram_addr_t start
, ram_addr_t length
,
866 cpu_physical_memory_clear_dirty_range_type(start
, length
, client
);
869 tlb_reset_dirty_range_all(start
, length
);
873 static void cpu_physical_memory_set_dirty_tracking(bool enable
)
875 in_migration
= enable
;
878 /* Called from RCU critical section */
879 hwaddr
memory_region_section_get_iotlb(CPUState
*cpu
,
880 MemoryRegionSection
*section
,
882 hwaddr paddr
, hwaddr xlat
,
884 target_ulong
*address
)
889 if (memory_region_is_ram(section
->mr
)) {
891 iotlb
= (memory_region_get_ram_addr(section
->mr
) & TARGET_PAGE_MASK
)
893 if (!section
->readonly
) {
894 iotlb
|= PHYS_SECTION_NOTDIRTY
;
896 iotlb
|= PHYS_SECTION_ROM
;
899 iotlb
= section
- section
->address_space
->dispatch
->map
.sections
;
903 /* Make accesses to pages with watchpoints go via the
904 watchpoint trap routines. */
905 QTAILQ_FOREACH(wp
, &cpu
->watchpoints
, entry
) {
906 if (cpu_watchpoint_address_matches(wp
, vaddr
, TARGET_PAGE_SIZE
)) {
907 /* Avoid trapping reads of pages with a write breakpoint. */
908 if ((prot
& PAGE_WRITE
) || (wp
->flags
& BP_MEM_READ
)) {
909 iotlb
= PHYS_SECTION_WATCH
+ paddr
;
910 *address
|= TLB_MMIO
;
918 #endif /* defined(CONFIG_USER_ONLY) */
920 #if !defined(CONFIG_USER_ONLY)
922 static int subpage_register (subpage_t
*mmio
, uint32_t start
, uint32_t end
,
924 static subpage_t
*subpage_init(AddressSpace
*as
, hwaddr base
);
926 static void *(*phys_mem_alloc
)(size_t size
, uint64_t *align
) =
930 * Set a custom physical guest memory alloator.
931 * Accelerators with unusual needs may need this. Hopefully, we can
932 * get rid of it eventually.
934 void phys_mem_set_alloc(void *(*alloc
)(size_t, uint64_t *align
))
936 phys_mem_alloc
= alloc
;
939 static uint16_t phys_section_add(PhysPageMap
*map
,
940 MemoryRegionSection
*section
)
942 /* The physical section number is ORed with a page-aligned
943 * pointer to produce the iotlb entries. Thus it should
944 * never overflow into the page-aligned value.
946 assert(map
->sections_nb
< TARGET_PAGE_SIZE
);
948 if (map
->sections_nb
== map
->sections_nb_alloc
) {
949 map
->sections_nb_alloc
= MAX(map
->sections_nb_alloc
* 2, 16);
950 map
->sections
= g_renew(MemoryRegionSection
, map
->sections
,
951 map
->sections_nb_alloc
);
953 map
->sections
[map
->sections_nb
] = *section
;
954 memory_region_ref(section
->mr
);
955 return map
->sections_nb
++;
958 static void phys_section_destroy(MemoryRegion
*mr
)
960 memory_region_unref(mr
);
963 subpage_t
*subpage
= container_of(mr
, subpage_t
, iomem
);
964 object_unref(OBJECT(&subpage
->iomem
));
969 static void phys_sections_free(PhysPageMap
*map
)
971 while (map
->sections_nb
> 0) {
972 MemoryRegionSection
*section
= &map
->sections
[--map
->sections_nb
];
973 phys_section_destroy(section
->mr
);
975 g_free(map
->sections
);
979 static void register_subpage(AddressSpaceDispatch
*d
, MemoryRegionSection
*section
)
982 hwaddr base
= section
->offset_within_address_space
984 MemoryRegionSection
*existing
= phys_page_find(d
->phys_map
, base
,
985 d
->map
.nodes
, d
->map
.sections
);
986 MemoryRegionSection subsection
= {
987 .offset_within_address_space
= base
,
988 .size
= int128_make64(TARGET_PAGE_SIZE
),
992 assert(existing
->mr
->subpage
|| existing
->mr
== &io_mem_unassigned
);
994 if (!(existing
->mr
->subpage
)) {
995 subpage
= subpage_init(d
->as
, base
);
996 subsection
.address_space
= d
->as
;
997 subsection
.mr
= &subpage
->iomem
;
998 phys_page_set(d
, base
>> TARGET_PAGE_BITS
, 1,
999 phys_section_add(&d
->map
, &subsection
));
1001 subpage
= container_of(existing
->mr
, subpage_t
, iomem
);
1003 start
= section
->offset_within_address_space
& ~TARGET_PAGE_MASK
;
1004 end
= start
+ int128_get64(section
->size
) - 1;
1005 subpage_register(subpage
, start
, end
,
1006 phys_section_add(&d
->map
, section
));
1010 static void register_multipage(AddressSpaceDispatch
*d
,
1011 MemoryRegionSection
*section
)
1013 hwaddr start_addr
= section
->offset_within_address_space
;
1014 uint16_t section_index
= phys_section_add(&d
->map
, section
);
1015 uint64_t num_pages
= int128_get64(int128_rshift(section
->size
,
1019 phys_page_set(d
, start_addr
>> TARGET_PAGE_BITS
, num_pages
, section_index
);
1022 static void mem_add(MemoryListener
*listener
, MemoryRegionSection
*section
)
1024 AddressSpace
*as
= container_of(listener
, AddressSpace
, dispatch_listener
);
1025 AddressSpaceDispatch
*d
= as
->next_dispatch
;
1026 MemoryRegionSection now
= *section
, remain
= *section
;
1027 Int128 page_size
= int128_make64(TARGET_PAGE_SIZE
);
1029 if (now
.offset_within_address_space
& ~TARGET_PAGE_MASK
) {
1030 uint64_t left
= TARGET_PAGE_ALIGN(now
.offset_within_address_space
)
1031 - now
.offset_within_address_space
;
1033 now
.size
= int128_min(int128_make64(left
), now
.size
);
1034 register_subpage(d
, &now
);
1036 now
.size
= int128_zero();
1038 while (int128_ne(remain
.size
, now
.size
)) {
1039 remain
.size
= int128_sub(remain
.size
, now
.size
);
1040 remain
.offset_within_address_space
+= int128_get64(now
.size
);
1041 remain
.offset_within_region
+= int128_get64(now
.size
);
1043 if (int128_lt(remain
.size
, page_size
)) {
1044 register_subpage(d
, &now
);
1045 } else if (remain
.offset_within_address_space
& ~TARGET_PAGE_MASK
) {
1046 now
.size
= page_size
;
1047 register_subpage(d
, &now
);
1049 now
.size
= int128_and(now
.size
, int128_neg(page_size
));
1050 register_multipage(d
, &now
);
1055 void qemu_flush_coalesced_mmio_buffer(void)
1058 kvm_flush_coalesced_mmio_buffer();
1061 void qemu_mutex_lock_ramlist(void)
1063 qemu_mutex_lock(&ram_list
.mutex
);
1066 void qemu_mutex_unlock_ramlist(void)
1068 qemu_mutex_unlock(&ram_list
.mutex
);
1073 #include <sys/vfs.h>
1075 #define HUGETLBFS_MAGIC 0x958458f6
1077 static long gethugepagesize(const char *path
, Error
**errp
)
1083 ret
= statfs(path
, &fs
);
1084 } while (ret
!= 0 && errno
== EINTR
);
1087 error_setg_errno(errp
, errno
, "failed to get page size of file %s",
1092 if (fs
.f_type
!= HUGETLBFS_MAGIC
)
1093 fprintf(stderr
, "Warning: path not on HugeTLBFS: %s\n", path
);
1098 static void *file_ram_alloc(RAMBlock
*block
,
1104 char *sanitized_name
;
1109 Error
*local_err
= NULL
;
1111 hpagesize
= gethugepagesize(path
, &local_err
);
1113 error_propagate(errp
, local_err
);
1116 block
->mr
->align
= hpagesize
;
1118 if (memory
< hpagesize
) {
1119 error_setg(errp
, "memory size 0x" RAM_ADDR_FMT
" must be equal to "
1120 "or larger than huge page size 0x%" PRIx64
,
1125 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1127 "host lacks kvm mmu notifiers, -mem-path unsupported");
1131 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1132 sanitized_name
= g_strdup(memory_region_name(block
->mr
));
1133 for (c
= sanitized_name
; *c
!= '\0'; c
++) {
1138 filename
= g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path
,
1140 g_free(sanitized_name
);
1142 fd
= mkstemp(filename
);
1144 error_setg_errno(errp
, errno
,
1145 "unable to create backing store for hugepages");
1152 memory
= (memory
+hpagesize
-1) & ~(hpagesize
-1);
1155 * ftruncate is not supported by hugetlbfs in older
1156 * hosts, so don't bother bailing out on errors.
1157 * If anything goes wrong with it under other filesystems,
1160 if (ftruncate(fd
, memory
)) {
1161 perror("ftruncate");
1164 area
= mmap(0, memory
, PROT_READ
| PROT_WRITE
,
1165 (block
->flags
& RAM_SHARED
? MAP_SHARED
: MAP_PRIVATE
),
1167 if (area
== MAP_FAILED
) {
1168 error_setg_errno(errp
, errno
,
1169 "unable to map backing store for hugepages");
1175 os_mem_prealloc(fd
, area
, memory
);
1183 error_report("%s", error_get_pretty(*errp
));
1190 /* Called with the ramlist lock held. */
1191 static ram_addr_t
find_ram_offset(ram_addr_t size
)
1193 RAMBlock
*block
, *next_block
;
1194 ram_addr_t offset
= RAM_ADDR_MAX
, mingap
= RAM_ADDR_MAX
;
1196 assert(size
!= 0); /* it would hand out same offset multiple times */
1198 if (QLIST_EMPTY_RCU(&ram_list
.blocks
)) {
1202 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1203 ram_addr_t end
, next
= RAM_ADDR_MAX
;
1205 end
= block
->offset
+ block
->max_length
;
1207 QLIST_FOREACH_RCU(next_block
, &ram_list
.blocks
, next
) {
1208 if (next_block
->offset
>= end
) {
1209 next
= MIN(next
, next_block
->offset
);
1212 if (next
- end
>= size
&& next
- end
< mingap
) {
1214 mingap
= next
- end
;
1218 if (offset
== RAM_ADDR_MAX
) {
1219 fprintf(stderr
, "Failed to find gap of requested size: %" PRIu64
"\n",
1227 ram_addr_t
last_ram_offset(void)
1230 ram_addr_t last
= 0;
1233 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1234 last
= MAX(last
, block
->offset
+ block
->max_length
);
1240 static void qemu_ram_setup_dump(void *addr
, ram_addr_t size
)
1244 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1245 if (!machine_dump_guest_core(current_machine
)) {
1246 ret
= qemu_madvise(addr
, size
, QEMU_MADV_DONTDUMP
);
1248 perror("qemu_madvise");
1249 fprintf(stderr
, "madvise doesn't support MADV_DONTDUMP, "
1250 "but dump_guest_core=off specified\n");
1255 /* Called within an RCU critical section, or while the ramlist lock
1258 static RAMBlock
*find_ram_block(ram_addr_t addr
)
1262 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1263 if (block
->offset
== addr
) {
1271 /* Called with iothread lock held. */
1272 void qemu_ram_set_idstr(ram_addr_t addr
, const char *name
, DeviceState
*dev
)
1274 RAMBlock
*new_block
, *block
;
1277 new_block
= find_ram_block(addr
);
1279 assert(!new_block
->idstr
[0]);
1282 char *id
= qdev_get_dev_path(dev
);
1284 snprintf(new_block
->idstr
, sizeof(new_block
->idstr
), "%s/", id
);
1288 pstrcat(new_block
->idstr
, sizeof(new_block
->idstr
), name
);
1290 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1291 if (block
!= new_block
&& !strcmp(block
->idstr
, new_block
->idstr
)) {
1292 fprintf(stderr
, "RAMBlock \"%s\" already registered, abort!\n",
1300 /* Called with iothread lock held. */
1301 void qemu_ram_unset_idstr(ram_addr_t addr
)
1305 /* FIXME: arch_init.c assumes that this is not called throughout
1306 * migration. Ignore the problem since hot-unplug during migration
1307 * does not work anyway.
1311 block
= find_ram_block(addr
);
1313 memset(block
->idstr
, 0, sizeof(block
->idstr
));
1318 static int memory_try_enable_merging(void *addr
, size_t len
)
1320 if (!machine_mem_merge(current_machine
)) {
1321 /* disabled by the user */
1325 return qemu_madvise(addr
, len
, QEMU_MADV_MERGEABLE
);
1328 /* Only legal before guest might have detected the memory size: e.g. on
1329 * incoming migration, or right after reset.
1331 * As memory core doesn't know how is memory accessed, it is up to
1332 * resize callback to update device state and/or add assertions to detect
1333 * misuse, if necessary.
1335 int qemu_ram_resize(ram_addr_t base
, ram_addr_t newsize
, Error
**errp
)
1337 RAMBlock
*block
= find_ram_block(base
);
1341 newsize
= TARGET_PAGE_ALIGN(newsize
);
1343 if (block
->used_length
== newsize
) {
1347 if (!(block
->flags
& RAM_RESIZEABLE
)) {
1348 error_setg_errno(errp
, EINVAL
,
1349 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1350 " in != 0x" RAM_ADDR_FMT
, block
->idstr
,
1351 newsize
, block
->used_length
);
1355 if (block
->max_length
< newsize
) {
1356 error_setg_errno(errp
, EINVAL
,
1357 "Length too large: %s: 0x" RAM_ADDR_FMT
1358 " > 0x" RAM_ADDR_FMT
, block
->idstr
,
1359 newsize
, block
->max_length
);
1363 cpu_physical_memory_clear_dirty_range(block
->offset
, block
->used_length
);
1364 block
->used_length
= newsize
;
1365 cpu_physical_memory_set_dirty_range(block
->offset
, block
->used_length
);
1366 memory_region_set_size(block
->mr
, newsize
);
1367 if (block
->resized
) {
1368 block
->resized(block
->idstr
, newsize
, block
->host
);
1373 static ram_addr_t
ram_block_add(RAMBlock
*new_block
, Error
**errp
)
1376 RAMBlock
*last_block
= NULL
;
1377 ram_addr_t old_ram_size
, new_ram_size
;
1379 old_ram_size
= last_ram_offset() >> TARGET_PAGE_BITS
;
1381 qemu_mutex_lock_ramlist();
1382 new_block
->offset
= find_ram_offset(new_block
->max_length
);
1384 if (!new_block
->host
) {
1385 if (xen_enabled()) {
1386 xen_ram_alloc(new_block
->offset
, new_block
->max_length
,
1389 new_block
->host
= phys_mem_alloc(new_block
->max_length
,
1390 &new_block
->mr
->align
);
1391 if (!new_block
->host
) {
1392 error_setg_errno(errp
, errno
,
1393 "cannot set up guest memory '%s'",
1394 memory_region_name(new_block
->mr
));
1395 qemu_mutex_unlock_ramlist();
1398 memory_try_enable_merging(new_block
->host
, new_block
->max_length
);
1402 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1403 * QLIST (which has an RCU-friendly variant) does not have insertion at
1404 * tail, so save the last element in last_block.
1406 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1408 if (block
->max_length
< new_block
->max_length
) {
1413 QLIST_INSERT_BEFORE_RCU(block
, new_block
, next
);
1414 } else if (last_block
) {
1415 QLIST_INSERT_AFTER_RCU(last_block
, new_block
, next
);
1416 } else { /* list is empty */
1417 QLIST_INSERT_HEAD_RCU(&ram_list
.blocks
, new_block
, next
);
1419 ram_list
.mru_block
= NULL
;
1421 /* Write list before version */
1424 qemu_mutex_unlock_ramlist();
1426 new_ram_size
= last_ram_offset() >> TARGET_PAGE_BITS
;
1428 if (new_ram_size
> old_ram_size
) {
1431 /* ram_list.dirty_memory[] is protected by the iothread lock. */
1432 for (i
= 0; i
< DIRTY_MEMORY_NUM
; i
++) {
1433 ram_list
.dirty_memory
[i
] =
1434 bitmap_zero_extend(ram_list
.dirty_memory
[i
],
1435 old_ram_size
, new_ram_size
);
1438 cpu_physical_memory_set_dirty_range(new_block
->offset
,
1439 new_block
->used_length
);
1441 if (new_block
->host
) {
1442 qemu_ram_setup_dump(new_block
->host
, new_block
->max_length
);
1443 qemu_madvise(new_block
->host
, new_block
->max_length
, QEMU_MADV_HUGEPAGE
);
1444 qemu_madvise(new_block
->host
, new_block
->max_length
, QEMU_MADV_DONTFORK
);
1445 if (kvm_enabled()) {
1446 kvm_setup_guest_memory(new_block
->host
, new_block
->max_length
);
1450 return new_block
->offset
;
1454 ram_addr_t
qemu_ram_alloc_from_file(ram_addr_t size
, MemoryRegion
*mr
,
1455 bool share
, const char *mem_path
,
1458 RAMBlock
*new_block
;
1460 Error
*local_err
= NULL
;
1462 if (xen_enabled()) {
1463 error_setg(errp
, "-mem-path not supported with Xen");
1467 if (phys_mem_alloc
!= qemu_anon_ram_alloc
) {
1469 * file_ram_alloc() needs to allocate just like
1470 * phys_mem_alloc, but we haven't bothered to provide
1474 "-mem-path not supported with this accelerator");
1478 size
= TARGET_PAGE_ALIGN(size
);
1479 new_block
= g_malloc0(sizeof(*new_block
));
1481 new_block
->used_length
= size
;
1482 new_block
->max_length
= size
;
1483 new_block
->flags
= share
? RAM_SHARED
: 0;
1484 new_block
->host
= file_ram_alloc(new_block
, size
,
1486 if (!new_block
->host
) {
1491 addr
= ram_block_add(new_block
, &local_err
);
1494 error_propagate(errp
, local_err
);
1502 ram_addr_t
qemu_ram_alloc_internal(ram_addr_t size
, ram_addr_t max_size
,
1503 void (*resized
)(const char*,
1506 void *host
, bool resizeable
,
1507 MemoryRegion
*mr
, Error
**errp
)
1509 RAMBlock
*new_block
;
1511 Error
*local_err
= NULL
;
1513 size
= TARGET_PAGE_ALIGN(size
);
1514 max_size
= TARGET_PAGE_ALIGN(max_size
);
1515 new_block
= g_malloc0(sizeof(*new_block
));
1517 new_block
->resized
= resized
;
1518 new_block
->used_length
= size
;
1519 new_block
->max_length
= max_size
;
1520 assert(max_size
>= size
);
1522 new_block
->host
= host
;
1524 new_block
->flags
|= RAM_PREALLOC
;
1527 new_block
->flags
|= RAM_RESIZEABLE
;
1529 addr
= ram_block_add(new_block
, &local_err
);
1532 error_propagate(errp
, local_err
);
1538 ram_addr_t
qemu_ram_alloc_from_ptr(ram_addr_t size
, void *host
,
1539 MemoryRegion
*mr
, Error
**errp
)
1541 return qemu_ram_alloc_internal(size
, size
, NULL
, host
, false, mr
, errp
);
1544 ram_addr_t
qemu_ram_alloc(ram_addr_t size
, MemoryRegion
*mr
, Error
**errp
)
1546 return qemu_ram_alloc_internal(size
, size
, NULL
, NULL
, false, mr
, errp
);
1549 ram_addr_t
qemu_ram_alloc_resizeable(ram_addr_t size
, ram_addr_t maxsz
,
1550 void (*resized
)(const char*,
1553 MemoryRegion
*mr
, Error
**errp
)
1555 return qemu_ram_alloc_internal(size
, maxsz
, resized
, NULL
, true, mr
, errp
);
1558 void qemu_ram_free_from_ptr(ram_addr_t addr
)
1562 qemu_mutex_lock_ramlist();
1563 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1564 if (addr
== block
->offset
) {
1565 QLIST_REMOVE_RCU(block
, next
);
1566 ram_list
.mru_block
= NULL
;
1567 /* Write list before version */
1570 g_free_rcu(block
, rcu
);
1574 qemu_mutex_unlock_ramlist();
1577 static void reclaim_ramblock(RAMBlock
*block
)
1579 if (block
->flags
& RAM_PREALLOC
) {
1581 } else if (xen_enabled()) {
1582 xen_invalidate_map_cache_entry(block
->host
);
1584 } else if (block
->fd
>= 0) {
1585 munmap(block
->host
, block
->max_length
);
1589 qemu_anon_ram_free(block
->host
, block
->max_length
);
1594 void qemu_ram_free(ram_addr_t addr
)
1598 qemu_mutex_lock_ramlist();
1599 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1600 if (addr
== block
->offset
) {
1601 QLIST_REMOVE_RCU(block
, next
);
1602 ram_list
.mru_block
= NULL
;
1603 /* Write list before version */
1606 call_rcu(block
, reclaim_ramblock
, rcu
);
1610 qemu_mutex_unlock_ramlist();
1614 void qemu_ram_remap(ram_addr_t addr
, ram_addr_t length
)
1621 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1622 offset
= addr
- block
->offset
;
1623 if (offset
< block
->max_length
) {
1624 vaddr
= ramblock_ptr(block
, offset
);
1625 if (block
->flags
& RAM_PREALLOC
) {
1627 } else if (xen_enabled()) {
1631 if (block
->fd
>= 0) {
1632 flags
|= (block
->flags
& RAM_SHARED
?
1633 MAP_SHARED
: MAP_PRIVATE
);
1634 area
= mmap(vaddr
, length
, PROT_READ
| PROT_WRITE
,
1635 flags
, block
->fd
, offset
);
1638 * Remap needs to match alloc. Accelerators that
1639 * set phys_mem_alloc never remap. If they did,
1640 * we'd need a remap hook here.
1642 assert(phys_mem_alloc
== qemu_anon_ram_alloc
);
1644 flags
|= MAP_PRIVATE
| MAP_ANONYMOUS
;
1645 area
= mmap(vaddr
, length
, PROT_READ
| PROT_WRITE
,
1648 if (area
!= vaddr
) {
1649 fprintf(stderr
, "Could not remap addr: "
1650 RAM_ADDR_FMT
"@" RAM_ADDR_FMT
"\n",
1654 memory_try_enable_merging(vaddr
, length
);
1655 qemu_ram_setup_dump(vaddr
, length
);
1660 #endif /* !_WIN32 */
1662 int qemu_get_ram_fd(ram_addr_t addr
)
1668 block
= qemu_get_ram_block(addr
);
1674 void *qemu_get_ram_block_host_ptr(ram_addr_t addr
)
1680 block
= qemu_get_ram_block(addr
);
1681 ptr
= ramblock_ptr(block
, 0);
1686 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1687 * This should not be used for general purpose DMA. Use address_space_map
1688 * or address_space_rw instead. For local memory (e.g. video ram) that the
1689 * device owns, use memory_region_get_ram_ptr.
1691 * By the time this function returns, the returned pointer is not protected
1692 * by RCU anymore. If the caller is not within an RCU critical section and
1693 * does not hold the iothread lock, it must have other means of protecting the
1694 * pointer, such as a reference to the region that includes the incoming
1697 void *qemu_get_ram_ptr(ram_addr_t addr
)
1703 block
= qemu_get_ram_block(addr
);
1705 if (xen_enabled() && block
->host
== NULL
) {
1706 /* We need to check if the requested address is in the RAM
1707 * because we don't want to map the entire memory in QEMU.
1708 * In that case just map until the end of the page.
1710 if (block
->offset
== 0) {
1711 ptr
= xen_map_cache(addr
, 0, 0);
1715 block
->host
= xen_map_cache(block
->offset
, block
->max_length
, 1);
1717 ptr
= ramblock_ptr(block
, addr
- block
->offset
);
1724 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1725 * but takes a size argument.
1727 * By the time this function returns, the returned pointer is not protected
1728 * by RCU anymore. If the caller is not within an RCU critical section and
1729 * does not hold the iothread lock, it must have other means of protecting the
1730 * pointer, such as a reference to the region that includes the incoming
1733 static void *qemu_ram_ptr_length(ram_addr_t addr
, hwaddr
*size
)
1739 if (xen_enabled()) {
1740 return xen_map_cache(addr
, *size
, 1);
1744 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1745 if (addr
- block
->offset
< block
->max_length
) {
1746 if (addr
- block
->offset
+ *size
> block
->max_length
)
1747 *size
= block
->max_length
- addr
+ block
->offset
;
1748 ptr
= ramblock_ptr(block
, addr
- block
->offset
);
1754 fprintf(stderr
, "Bad ram offset %" PRIx64
"\n", (uint64_t)addr
);
1759 /* Some of the softmmu routines need to translate from a host pointer
1760 * (typically a TLB entry) back to a ram offset.
1762 * By the time this function returns, the returned pointer is not protected
1763 * by RCU anymore. If the caller is not within an RCU critical section and
1764 * does not hold the iothread lock, it must have other means of protecting the
1765 * pointer, such as a reference to the region that includes the incoming
1768 MemoryRegion
*qemu_ram_addr_from_host(void *ptr
, ram_addr_t
*ram_addr
)
1771 uint8_t *host
= ptr
;
1774 if (xen_enabled()) {
1776 *ram_addr
= xen_ram_addr_from_mapcache(ptr
);
1777 mr
= qemu_get_ram_block(*ram_addr
)->mr
;
1783 block
= atomic_rcu_read(&ram_list
.mru_block
);
1784 if (block
&& block
->host
&& host
- block
->host
< block
->max_length
) {
1788 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1789 /* This case append when the block is not mapped. */
1790 if (block
->host
== NULL
) {
1793 if (host
- block
->host
< block
->max_length
) {
1802 *ram_addr
= block
->offset
+ (host
- block
->host
);
1808 static void notdirty_mem_write(void *opaque
, hwaddr ram_addr
,
1809 uint64_t val
, unsigned size
)
1811 if (!cpu_physical_memory_get_dirty_flag(ram_addr
, DIRTY_MEMORY_CODE
)) {
1812 tb_invalidate_phys_page_fast(ram_addr
, size
);
1816 stb_p(qemu_get_ram_ptr(ram_addr
), val
);
1819 stw_p(qemu_get_ram_ptr(ram_addr
), val
);
1822 stl_p(qemu_get_ram_ptr(ram_addr
), val
);
1827 cpu_physical_memory_set_dirty_range_nocode(ram_addr
, size
);
1828 /* we remove the notdirty callback only if the code has been
1830 if (!cpu_physical_memory_is_clean(ram_addr
)) {
1831 CPUArchState
*env
= current_cpu
->env_ptr
;
1832 tlb_set_dirty(env
, current_cpu
->mem_io_vaddr
);
1836 static bool notdirty_mem_accepts(void *opaque
, hwaddr addr
,
1837 unsigned size
, bool is_write
)
1842 static const MemoryRegionOps notdirty_mem_ops
= {
1843 .write
= notdirty_mem_write
,
1844 .valid
.accepts
= notdirty_mem_accepts
,
1845 .endianness
= DEVICE_NATIVE_ENDIAN
,
1848 /* Generate a debug exception if a watchpoint has been hit. */
1849 static void check_watchpoint(int offset
, int len
, MemTxAttrs attrs
, int flags
)
1851 CPUState
*cpu
= current_cpu
;
1852 CPUArchState
*env
= cpu
->env_ptr
;
1853 target_ulong pc
, cs_base
;
1858 if (cpu
->watchpoint_hit
) {
1859 /* We re-entered the check after replacing the TB. Now raise
1860 * the debug interrupt so that is will trigger after the
1861 * current instruction. */
1862 cpu_interrupt(cpu
, CPU_INTERRUPT_DEBUG
);
1865 vaddr
= (cpu
->mem_io_vaddr
& TARGET_PAGE_MASK
) + offset
;
1866 QTAILQ_FOREACH(wp
, &cpu
->watchpoints
, entry
) {
1867 if (cpu_watchpoint_address_matches(wp
, vaddr
, len
)
1868 && (wp
->flags
& flags
)) {
1869 if (flags
== BP_MEM_READ
) {
1870 wp
->flags
|= BP_WATCHPOINT_HIT_READ
;
1872 wp
->flags
|= BP_WATCHPOINT_HIT_WRITE
;
1874 wp
->hitaddr
= vaddr
;
1875 wp
->hitattrs
= attrs
;
1876 if (!cpu
->watchpoint_hit
) {
1877 cpu
->watchpoint_hit
= wp
;
1878 tb_check_watchpoint(cpu
);
1879 if (wp
->flags
& BP_STOP_BEFORE_ACCESS
) {
1880 cpu
->exception_index
= EXCP_DEBUG
;
1883 cpu_get_tb_cpu_state(env
, &pc
, &cs_base
, &cpu_flags
);
1884 tb_gen_code(cpu
, pc
, cs_base
, cpu_flags
, 1);
1885 cpu_resume_from_signal(cpu
, NULL
);
1889 wp
->flags
&= ~BP_WATCHPOINT_HIT
;
1894 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1895 so these check for a hit then pass through to the normal out-of-line
1897 static MemTxResult
watch_mem_read(void *opaque
, hwaddr addr
, uint64_t *pdata
,
1898 unsigned size
, MemTxAttrs attrs
)
1903 check_watchpoint(addr
& ~TARGET_PAGE_MASK
, size
, attrs
, BP_MEM_READ
);
1906 data
= address_space_ldub(&address_space_memory
, addr
, attrs
, &res
);
1909 data
= address_space_lduw(&address_space_memory
, addr
, attrs
, &res
);
1912 data
= address_space_ldl(&address_space_memory
, addr
, attrs
, &res
);
1920 static MemTxResult
watch_mem_write(void *opaque
, hwaddr addr
,
1921 uint64_t val
, unsigned size
,
1926 check_watchpoint(addr
& ~TARGET_PAGE_MASK
, size
, attrs
, BP_MEM_WRITE
);
1929 address_space_stb(&address_space_memory
, addr
, val
, attrs
, &res
);
1932 address_space_stw(&address_space_memory
, addr
, val
, attrs
, &res
);
1935 address_space_stl(&address_space_memory
, addr
, val
, attrs
, &res
);
1942 static const MemoryRegionOps watch_mem_ops
= {
1943 .read_with_attrs
= watch_mem_read
,
1944 .write_with_attrs
= watch_mem_write
,
1945 .endianness
= DEVICE_NATIVE_ENDIAN
,
1948 static MemTxResult
subpage_read(void *opaque
, hwaddr addr
, uint64_t *data
,
1949 unsigned len
, MemTxAttrs attrs
)
1951 subpage_t
*subpage
= opaque
;
1955 #if defined(DEBUG_SUBPAGE)
1956 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
"\n", __func__
,
1957 subpage
, len
, addr
);
1959 res
= address_space_read(subpage
->as
, addr
+ subpage
->base
,
1966 *data
= ldub_p(buf
);
1969 *data
= lduw_p(buf
);
1982 static MemTxResult
subpage_write(void *opaque
, hwaddr addr
,
1983 uint64_t value
, unsigned len
, MemTxAttrs attrs
)
1985 subpage_t
*subpage
= opaque
;
1988 #if defined(DEBUG_SUBPAGE)
1989 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1990 " value %"PRIx64
"\n",
1991 __func__
, subpage
, len
, addr
, value
);
2009 return address_space_write(subpage
->as
, addr
+ subpage
->base
,
2013 static bool subpage_accepts(void *opaque
, hwaddr addr
,
2014 unsigned len
, bool is_write
)
2016 subpage_t
*subpage
= opaque
;
2017 #if defined(DEBUG_SUBPAGE)
2018 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx
"\n",
2019 __func__
, subpage
, is_write
? 'w' : 'r', len
, addr
);
2022 return address_space_access_valid(subpage
->as
, addr
+ subpage
->base
,
2026 static const MemoryRegionOps subpage_ops
= {
2027 .read_with_attrs
= subpage_read
,
2028 .write_with_attrs
= subpage_write
,
2029 .impl
.min_access_size
= 1,
2030 .impl
.max_access_size
= 8,
2031 .valid
.min_access_size
= 1,
2032 .valid
.max_access_size
= 8,
2033 .valid
.accepts
= subpage_accepts
,
2034 .endianness
= DEVICE_NATIVE_ENDIAN
,
2037 static int subpage_register (subpage_t
*mmio
, uint32_t start
, uint32_t end
,
2042 if (start
>= TARGET_PAGE_SIZE
|| end
>= TARGET_PAGE_SIZE
)
2044 idx
= SUBPAGE_IDX(start
);
2045 eidx
= SUBPAGE_IDX(end
);
2046 #if defined(DEBUG_SUBPAGE)
2047 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2048 __func__
, mmio
, start
, end
, idx
, eidx
, section
);
2050 for (; idx
<= eidx
; idx
++) {
2051 mmio
->sub_section
[idx
] = section
;
2057 static subpage_t
*subpage_init(AddressSpace
*as
, hwaddr base
)
2061 mmio
= g_malloc0(sizeof(subpage_t
));
2065 memory_region_init_io(&mmio
->iomem
, NULL
, &subpage_ops
, mmio
,
2066 NULL
, TARGET_PAGE_SIZE
);
2067 mmio
->iomem
.subpage
= true;
2068 #if defined(DEBUG_SUBPAGE)
2069 printf("%s: %p base " TARGET_FMT_plx
" len %08x\n", __func__
,
2070 mmio
, base
, TARGET_PAGE_SIZE
);
2072 subpage_register(mmio
, 0, TARGET_PAGE_SIZE
-1, PHYS_SECTION_UNASSIGNED
);
2077 static uint16_t dummy_section(PhysPageMap
*map
, AddressSpace
*as
,
2081 MemoryRegionSection section
= {
2082 .address_space
= as
,
2084 .offset_within_address_space
= 0,
2085 .offset_within_region
= 0,
2086 .size
= int128_2_64(),
2089 return phys_section_add(map
, §ion
);
2092 MemoryRegion
*iotlb_to_region(CPUState
*cpu
, hwaddr index
)
2094 AddressSpaceDispatch
*d
= atomic_rcu_read(&cpu
->memory_dispatch
);
2095 MemoryRegionSection
*sections
= d
->map
.sections
;
2097 return sections
[index
& ~TARGET_PAGE_MASK
].mr
;
2100 static void io_mem_init(void)
2102 memory_region_init_io(&io_mem_rom
, NULL
, &unassigned_mem_ops
, NULL
, NULL
, UINT64_MAX
);
2103 memory_region_init_io(&io_mem_unassigned
, NULL
, &unassigned_mem_ops
, NULL
,
2105 memory_region_init_io(&io_mem_notdirty
, NULL
, ¬dirty_mem_ops
, NULL
,
2107 memory_region_init_io(&io_mem_watch
, NULL
, &watch_mem_ops
, NULL
,
2111 static void mem_begin(MemoryListener
*listener
)
2113 AddressSpace
*as
= container_of(listener
, AddressSpace
, dispatch_listener
);
2114 AddressSpaceDispatch
*d
= g_new0(AddressSpaceDispatch
, 1);
2117 n
= dummy_section(&d
->map
, as
, &io_mem_unassigned
);
2118 assert(n
== PHYS_SECTION_UNASSIGNED
);
2119 n
= dummy_section(&d
->map
, as
, &io_mem_notdirty
);
2120 assert(n
== PHYS_SECTION_NOTDIRTY
);
2121 n
= dummy_section(&d
->map
, as
, &io_mem_rom
);
2122 assert(n
== PHYS_SECTION_ROM
);
2123 n
= dummy_section(&d
->map
, as
, &io_mem_watch
);
2124 assert(n
== PHYS_SECTION_WATCH
);
2126 d
->phys_map
= (PhysPageEntry
) { .ptr
= PHYS_MAP_NODE_NIL
, .skip
= 1 };
2128 as
->next_dispatch
= d
;
2131 static void address_space_dispatch_free(AddressSpaceDispatch
*d
)
2133 phys_sections_free(&d
->map
);
2137 static void mem_commit(MemoryListener
*listener
)
2139 AddressSpace
*as
= container_of(listener
, AddressSpace
, dispatch_listener
);
2140 AddressSpaceDispatch
*cur
= as
->dispatch
;
2141 AddressSpaceDispatch
*next
= as
->next_dispatch
;
2143 phys_page_compact_all(next
, next
->map
.nodes_nb
);
2145 atomic_rcu_set(&as
->dispatch
, next
);
2147 call_rcu(cur
, address_space_dispatch_free
, rcu
);
2151 static void tcg_commit(MemoryListener
*listener
)
2155 /* since each CPU stores ram addresses in its TLB cache, we must
2156 reset the modified entries */
2159 /* FIXME: Disentangle the cpu.h circular files deps so we can
2160 directly get the right CPU from listener. */
2161 if (cpu
->tcg_as_listener
!= listener
) {
2164 cpu_reload_memory_map(cpu
);
2168 static void core_log_global_start(MemoryListener
*listener
)
2170 cpu_physical_memory_set_dirty_tracking(true);
2173 static void core_log_global_stop(MemoryListener
*listener
)
2175 cpu_physical_memory_set_dirty_tracking(false);
2178 static MemoryListener core_memory_listener
= {
2179 .log_global_start
= core_log_global_start
,
2180 .log_global_stop
= core_log_global_stop
,
2184 void address_space_init_dispatch(AddressSpace
*as
)
2186 as
->dispatch
= NULL
;
2187 as
->dispatch_listener
= (MemoryListener
) {
2189 .commit
= mem_commit
,
2190 .region_add
= mem_add
,
2191 .region_nop
= mem_add
,
2194 memory_listener_register(&as
->dispatch_listener
, as
);
2197 void address_space_unregister(AddressSpace
*as
)
2199 memory_listener_unregister(&as
->dispatch_listener
);
2202 void address_space_destroy_dispatch(AddressSpace
*as
)
2204 AddressSpaceDispatch
*d
= as
->dispatch
;
2206 atomic_rcu_set(&as
->dispatch
, NULL
);
2208 call_rcu(d
, address_space_dispatch_free
, rcu
);
2212 static void memory_map_init(void)
2214 system_memory
= g_malloc(sizeof(*system_memory
));
2216 memory_region_init(system_memory
, NULL
, "system", UINT64_MAX
);
2217 address_space_init(&address_space_memory
, system_memory
, "memory");
2219 system_io
= g_malloc(sizeof(*system_io
));
2220 memory_region_init_io(system_io
, NULL
, &unassigned_io_ops
, NULL
, "io",
2222 address_space_init(&address_space_io
, system_io
, "I/O");
2224 memory_listener_register(&core_memory_listener
, &address_space_memory
);
2227 MemoryRegion
*get_system_memory(void)
2229 return system_memory
;
2232 MemoryRegion
*get_system_io(void)
2237 #endif /* !defined(CONFIG_USER_ONLY) */
2239 /* physical memory access (slow version, mainly for debug) */
2240 #if defined(CONFIG_USER_ONLY)
2241 int cpu_memory_rw_debug(CPUState
*cpu
, target_ulong addr
,
2242 uint8_t *buf
, int len
, int is_write
)
2249 page
= addr
& TARGET_PAGE_MASK
;
2250 l
= (page
+ TARGET_PAGE_SIZE
) - addr
;
2253 flags
= page_get_flags(page
);
2254 if (!(flags
& PAGE_VALID
))
2257 if (!(flags
& PAGE_WRITE
))
2259 /* XXX: this code should not depend on lock_user */
2260 if (!(p
= lock_user(VERIFY_WRITE
, addr
, l
, 0)))
2263 unlock_user(p
, addr
, l
);
2265 if (!(flags
& PAGE_READ
))
2267 /* XXX: this code should not depend on lock_user */
2268 if (!(p
= lock_user(VERIFY_READ
, addr
, l
, 1)))
2271 unlock_user(p
, addr
, 0);
2282 static void invalidate_and_set_dirty(hwaddr addr
,
2285 if (cpu_physical_memory_range_includes_clean(addr
, length
)) {
2286 tb_invalidate_phys_range(addr
, addr
+ length
, 0);
2287 cpu_physical_memory_set_dirty_range_nocode(addr
, length
);
2289 xen_modified_memory(addr
, length
);
2292 static int memory_access_size(MemoryRegion
*mr
, unsigned l
, hwaddr addr
)
2294 unsigned access_size_max
= mr
->ops
->valid
.max_access_size
;
2296 /* Regions are assumed to support 1-4 byte accesses unless
2297 otherwise specified. */
2298 if (access_size_max
== 0) {
2299 access_size_max
= 4;
2302 /* Bound the maximum access by the alignment of the address. */
2303 if (!mr
->ops
->impl
.unaligned
) {
2304 unsigned align_size_max
= addr
& -addr
;
2305 if (align_size_max
!= 0 && align_size_max
< access_size_max
) {
2306 access_size_max
= align_size_max
;
2310 /* Don't attempt accesses larger than the maximum. */
2311 if (l
> access_size_max
) {
2312 l
= access_size_max
;
2315 l
= 1 << (qemu_fls(l
) - 1);
2321 MemTxResult
address_space_rw(AddressSpace
*as
, hwaddr addr
, MemTxAttrs attrs
,
2322 uint8_t *buf
, int len
, bool is_write
)
2329 MemTxResult result
= MEMTX_OK
;
2334 mr
= address_space_translate(as
, addr
, &addr1
, &l
, is_write
);
2337 if (!memory_access_is_direct(mr
, is_write
)) {
2338 l
= memory_access_size(mr
, l
, addr1
);
2339 /* XXX: could force current_cpu to NULL to avoid
2343 /* 64 bit write access */
2345 result
|= memory_region_dispatch_write(mr
, addr1
, val
, 8,
2349 /* 32 bit write access */
2351 result
|= memory_region_dispatch_write(mr
, addr1
, val
, 4,
2355 /* 16 bit write access */
2357 result
|= memory_region_dispatch_write(mr
, addr1
, val
, 2,
2361 /* 8 bit write access */
2363 result
|= memory_region_dispatch_write(mr
, addr1
, val
, 1,
2370 addr1
+= memory_region_get_ram_addr(mr
);
2372 ptr
= qemu_get_ram_ptr(addr1
);
2373 memcpy(ptr
, buf
, l
);
2374 invalidate_and_set_dirty(addr1
, l
);
2377 if (!memory_access_is_direct(mr
, is_write
)) {
2379 l
= memory_access_size(mr
, l
, addr1
);
2382 /* 64 bit read access */
2383 result
|= memory_region_dispatch_read(mr
, addr1
, &val
, 8,
2388 /* 32 bit read access */
2389 result
|= memory_region_dispatch_read(mr
, addr1
, &val
, 4,
2394 /* 16 bit read access */
2395 result
|= memory_region_dispatch_read(mr
, addr1
, &val
, 2,
2400 /* 8 bit read access */
2401 result
|= memory_region_dispatch_read(mr
, addr1
, &val
, 1,
2410 ptr
= qemu_get_ram_ptr(mr
->ram_addr
+ addr1
);
2411 memcpy(buf
, ptr
, l
);
2423 MemTxResult
address_space_write(AddressSpace
*as
, hwaddr addr
, MemTxAttrs attrs
,
2424 const uint8_t *buf
, int len
)
2426 return address_space_rw(as
, addr
, attrs
, (uint8_t *)buf
, len
, true);
2429 MemTxResult
address_space_read(AddressSpace
*as
, hwaddr addr
, MemTxAttrs attrs
,
2430 uint8_t *buf
, int len
)
2432 return address_space_rw(as
, addr
, attrs
, buf
, len
, false);
2436 void cpu_physical_memory_rw(hwaddr addr
, uint8_t *buf
,
2437 int len
, int is_write
)
2439 address_space_rw(&address_space_memory
, addr
, MEMTXATTRS_UNSPECIFIED
,
2440 buf
, len
, is_write
);
2443 enum write_rom_type
{
2448 static inline void cpu_physical_memory_write_rom_internal(AddressSpace
*as
,
2449 hwaddr addr
, const uint8_t *buf
, int len
, enum write_rom_type type
)
2459 mr
= address_space_translate(as
, addr
, &addr1
, &l
, true);
2461 if (!(memory_region_is_ram(mr
) ||
2462 memory_region_is_romd(mr
))) {
2465 addr1
+= memory_region_get_ram_addr(mr
);
2467 ptr
= qemu_get_ram_ptr(addr1
);
2470 memcpy(ptr
, buf
, l
);
2471 invalidate_and_set_dirty(addr1
, l
);
2474 flush_icache_range((uintptr_t)ptr
, (uintptr_t)ptr
+ l
);
2485 /* used for ROM loading : can write in RAM and ROM */
2486 void cpu_physical_memory_write_rom(AddressSpace
*as
, hwaddr addr
,
2487 const uint8_t *buf
, int len
)
2489 cpu_physical_memory_write_rom_internal(as
, addr
, buf
, len
, WRITE_DATA
);
2492 void cpu_flush_icache_range(hwaddr start
, int len
)
2495 * This function should do the same thing as an icache flush that was
2496 * triggered from within the guest. For TCG we are always cache coherent,
2497 * so there is no need to flush anything. For KVM / Xen we need to flush
2498 * the host's instruction cache at least.
2500 if (tcg_enabled()) {
2504 cpu_physical_memory_write_rom_internal(&address_space_memory
,
2505 start
, NULL
, len
, FLUSH_CACHE
);
2516 static BounceBuffer bounce
;
2518 typedef struct MapClient
{
2520 QLIST_ENTRY(MapClient
) link
;
2523 QemuMutex map_client_list_lock
;
2524 static QLIST_HEAD(map_client_list
, MapClient
) map_client_list
2525 = QLIST_HEAD_INITIALIZER(map_client_list
);
2527 static void cpu_unregister_map_client_do(MapClient
*client
)
2529 QLIST_REMOVE(client
, link
);
2533 static void cpu_notify_map_clients_locked(void)
2537 while (!QLIST_EMPTY(&map_client_list
)) {
2538 client
= QLIST_FIRST(&map_client_list
);
2539 qemu_bh_schedule(client
->bh
);
2540 cpu_unregister_map_client_do(client
);
2544 void cpu_register_map_client(QEMUBH
*bh
)
2546 MapClient
*client
= g_malloc(sizeof(*client
));
2548 qemu_mutex_lock(&map_client_list_lock
);
2550 QLIST_INSERT_HEAD(&map_client_list
, client
, link
);
2551 if (!atomic_read(&bounce
.in_use
)) {
2552 cpu_notify_map_clients_locked();
2554 qemu_mutex_unlock(&map_client_list_lock
);
2557 void cpu_exec_init_all(void)
2559 qemu_mutex_init(&ram_list
.mutex
);
2562 qemu_mutex_init(&map_client_list_lock
);
2565 void cpu_unregister_map_client(QEMUBH
*bh
)
2569 qemu_mutex_lock(&map_client_list_lock
);
2570 QLIST_FOREACH(client
, &map_client_list
, link
) {
2571 if (client
->bh
== bh
) {
2572 cpu_unregister_map_client_do(client
);
2576 qemu_mutex_unlock(&map_client_list_lock
);
2579 static void cpu_notify_map_clients(void)
2581 qemu_mutex_lock(&map_client_list_lock
);
2582 cpu_notify_map_clients_locked();
2583 qemu_mutex_unlock(&map_client_list_lock
);
2586 bool address_space_access_valid(AddressSpace
*as
, hwaddr addr
, int len
, bool is_write
)
2594 mr
= address_space_translate(as
, addr
, &xlat
, &l
, is_write
);
2595 if (!memory_access_is_direct(mr
, is_write
)) {
2596 l
= memory_access_size(mr
, l
, addr
);
2597 if (!memory_region_access_valid(mr
, xlat
, l
, is_write
)) {
2609 /* Map a physical memory region into a host virtual address.
2610 * May map a subset of the requested range, given by and returned in *plen.
2611 * May return NULL if resources needed to perform the mapping are exhausted.
2612 * Use only for reads OR writes - not for read-modify-write operations.
2613 * Use cpu_register_map_client() to know when retrying the map operation is
2614 * likely to succeed.
2616 void *address_space_map(AddressSpace
*as
,
2623 hwaddr l
, xlat
, base
;
2624 MemoryRegion
*mr
, *this_mr
;
2633 mr
= address_space_translate(as
, addr
, &xlat
, &l
, is_write
);
2635 if (!memory_access_is_direct(mr
, is_write
)) {
2636 if (atomic_xchg(&bounce
.in_use
, true)) {
2640 /* Avoid unbounded allocations */
2641 l
= MIN(l
, TARGET_PAGE_SIZE
);
2642 bounce
.buffer
= qemu_memalign(TARGET_PAGE_SIZE
, l
);
2646 memory_region_ref(mr
);
2649 address_space_read(as
, addr
, MEMTXATTRS_UNSPECIFIED
,
2655 return bounce
.buffer
;
2659 raddr
= memory_region_get_ram_addr(mr
);
2670 this_mr
= address_space_translate(as
, addr
, &xlat
, &l
, is_write
);
2671 if (this_mr
!= mr
|| xlat
!= base
+ done
) {
2676 memory_region_ref(mr
);
2679 return qemu_ram_ptr_length(raddr
+ base
, plen
);
2682 /* Unmaps a memory region previously mapped by address_space_map().
2683 * Will also mark the memory as dirty if is_write == 1. access_len gives
2684 * the amount of memory that was actually read or written by the caller.
2686 void address_space_unmap(AddressSpace
*as
, void *buffer
, hwaddr len
,
2687 int is_write
, hwaddr access_len
)
2689 if (buffer
!= bounce
.buffer
) {
2693 mr
= qemu_ram_addr_from_host(buffer
, &addr1
);
2696 invalidate_and_set_dirty(addr1
, access_len
);
2698 if (xen_enabled()) {
2699 xen_invalidate_map_cache_entry(buffer
);
2701 memory_region_unref(mr
);
2705 address_space_write(as
, bounce
.addr
, MEMTXATTRS_UNSPECIFIED
,
2706 bounce
.buffer
, access_len
);
2708 qemu_vfree(bounce
.buffer
);
2709 bounce
.buffer
= NULL
;
2710 memory_region_unref(bounce
.mr
);
2711 atomic_mb_set(&bounce
.in_use
, false);
2712 cpu_notify_map_clients();
2715 void *cpu_physical_memory_map(hwaddr addr
,
2719 return address_space_map(&address_space_memory
, addr
, plen
, is_write
);
2722 void cpu_physical_memory_unmap(void *buffer
, hwaddr len
,
2723 int is_write
, hwaddr access_len
)
2725 return address_space_unmap(&address_space_memory
, buffer
, len
, is_write
, access_len
);
2728 /* warning: addr must be aligned */
2729 static inline uint32_t address_space_ldl_internal(AddressSpace
*as
, hwaddr addr
,
2731 MemTxResult
*result
,
2732 enum device_endian endian
)
2742 mr
= address_space_translate(as
, addr
, &addr1
, &l
, false);
2743 if (l
< 4 || !memory_access_is_direct(mr
, false)) {
2745 r
= memory_region_dispatch_read(mr
, addr1
, &val
, 4, attrs
);
2746 #if defined(TARGET_WORDS_BIGENDIAN)
2747 if (endian
== DEVICE_LITTLE_ENDIAN
) {
2751 if (endian
== DEVICE_BIG_ENDIAN
) {
2757 ptr
= qemu_get_ram_ptr((memory_region_get_ram_addr(mr
)
2761 case DEVICE_LITTLE_ENDIAN
:
2762 val
= ldl_le_p(ptr
);
2764 case DEVICE_BIG_ENDIAN
:
2765 val
= ldl_be_p(ptr
);
2780 uint32_t address_space_ldl(AddressSpace
*as
, hwaddr addr
,
2781 MemTxAttrs attrs
, MemTxResult
*result
)
2783 return address_space_ldl_internal(as
, addr
, attrs
, result
,
2784 DEVICE_NATIVE_ENDIAN
);
2787 uint32_t address_space_ldl_le(AddressSpace
*as
, hwaddr addr
,
2788 MemTxAttrs attrs
, MemTxResult
*result
)
2790 return address_space_ldl_internal(as
, addr
, attrs
, result
,
2791 DEVICE_LITTLE_ENDIAN
);
2794 uint32_t address_space_ldl_be(AddressSpace
*as
, hwaddr addr
,
2795 MemTxAttrs attrs
, MemTxResult
*result
)
2797 return address_space_ldl_internal(as
, addr
, attrs
, result
,
2801 uint32_t ldl_phys(AddressSpace
*as
, hwaddr addr
)
2803 return address_space_ldl(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
2806 uint32_t ldl_le_phys(AddressSpace
*as
, hwaddr addr
)
2808 return address_space_ldl_le(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
2811 uint32_t ldl_be_phys(AddressSpace
*as
, hwaddr addr
)
2813 return address_space_ldl_be(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
2816 /* warning: addr must be aligned */
2817 static inline uint64_t address_space_ldq_internal(AddressSpace
*as
, hwaddr addr
,
2819 MemTxResult
*result
,
2820 enum device_endian endian
)
2830 mr
= address_space_translate(as
, addr
, &addr1
, &l
,
2832 if (l
< 8 || !memory_access_is_direct(mr
, false)) {
2834 r
= memory_region_dispatch_read(mr
, addr1
, &val
, 8, attrs
);
2835 #if defined(TARGET_WORDS_BIGENDIAN)
2836 if (endian
== DEVICE_LITTLE_ENDIAN
) {
2840 if (endian
== DEVICE_BIG_ENDIAN
) {
2846 ptr
= qemu_get_ram_ptr((memory_region_get_ram_addr(mr
)
2850 case DEVICE_LITTLE_ENDIAN
:
2851 val
= ldq_le_p(ptr
);
2853 case DEVICE_BIG_ENDIAN
:
2854 val
= ldq_be_p(ptr
);
2869 uint64_t address_space_ldq(AddressSpace
*as
, hwaddr addr
,
2870 MemTxAttrs attrs
, MemTxResult
*result
)
2872 return address_space_ldq_internal(as
, addr
, attrs
, result
,
2873 DEVICE_NATIVE_ENDIAN
);
2876 uint64_t address_space_ldq_le(AddressSpace
*as
, hwaddr addr
,
2877 MemTxAttrs attrs
, MemTxResult
*result
)
2879 return address_space_ldq_internal(as
, addr
, attrs
, result
,
2880 DEVICE_LITTLE_ENDIAN
);
2883 uint64_t address_space_ldq_be(AddressSpace
*as
, hwaddr addr
,
2884 MemTxAttrs attrs
, MemTxResult
*result
)
2886 return address_space_ldq_internal(as
, addr
, attrs
, result
,
2890 uint64_t ldq_phys(AddressSpace
*as
, hwaddr addr
)
2892 return address_space_ldq(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
2895 uint64_t ldq_le_phys(AddressSpace
*as
, hwaddr addr
)
2897 return address_space_ldq_le(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
2900 uint64_t ldq_be_phys(AddressSpace
*as
, hwaddr addr
)
2902 return address_space_ldq_be(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
2906 uint32_t address_space_ldub(AddressSpace
*as
, hwaddr addr
,
2907 MemTxAttrs attrs
, MemTxResult
*result
)
2912 r
= address_space_rw(as
, addr
, attrs
, &val
, 1, 0);
2919 uint32_t ldub_phys(AddressSpace
*as
, hwaddr addr
)
2921 return address_space_ldub(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
2924 /* warning: addr must be aligned */
2925 static inline uint32_t address_space_lduw_internal(AddressSpace
*as
,
2928 MemTxResult
*result
,
2929 enum device_endian endian
)
2939 mr
= address_space_translate(as
, addr
, &addr1
, &l
,
2941 if (l
< 2 || !memory_access_is_direct(mr
, false)) {
2943 r
= memory_region_dispatch_read(mr
, addr1
, &val
, 2, attrs
);
2944 #if defined(TARGET_WORDS_BIGENDIAN)
2945 if (endian
== DEVICE_LITTLE_ENDIAN
) {
2949 if (endian
== DEVICE_BIG_ENDIAN
) {
2955 ptr
= qemu_get_ram_ptr((memory_region_get_ram_addr(mr
)
2959 case DEVICE_LITTLE_ENDIAN
:
2960 val
= lduw_le_p(ptr
);
2962 case DEVICE_BIG_ENDIAN
:
2963 val
= lduw_be_p(ptr
);
2978 uint32_t address_space_lduw(AddressSpace
*as
, hwaddr addr
,
2979 MemTxAttrs attrs
, MemTxResult
*result
)
2981 return address_space_lduw_internal(as
, addr
, attrs
, result
,
2982 DEVICE_NATIVE_ENDIAN
);
2985 uint32_t address_space_lduw_le(AddressSpace
*as
, hwaddr addr
,
2986 MemTxAttrs attrs
, MemTxResult
*result
)
2988 return address_space_lduw_internal(as
, addr
, attrs
, result
,
2989 DEVICE_LITTLE_ENDIAN
);
2992 uint32_t address_space_lduw_be(AddressSpace
*as
, hwaddr addr
,
2993 MemTxAttrs attrs
, MemTxResult
*result
)
2995 return address_space_lduw_internal(as
, addr
, attrs
, result
,
2999 uint32_t lduw_phys(AddressSpace
*as
, hwaddr addr
)
3001 return address_space_lduw(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3004 uint32_t lduw_le_phys(AddressSpace
*as
, hwaddr addr
)
3006 return address_space_lduw_le(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3009 uint32_t lduw_be_phys(AddressSpace
*as
, hwaddr addr
)
3011 return address_space_lduw_be(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3014 /* warning: addr must be aligned. The ram page is not masked as dirty
3015 and the code inside is not invalidated. It is useful if the dirty
3016 bits are used to track modified PTEs */
3017 void address_space_stl_notdirty(AddressSpace
*as
, hwaddr addr
, uint32_t val
,
3018 MemTxAttrs attrs
, MemTxResult
*result
)
3027 mr
= address_space_translate(as
, addr
, &addr1
, &l
,
3029 if (l
< 4 || !memory_access_is_direct(mr
, true)) {
3030 r
= memory_region_dispatch_write(mr
, addr1
, val
, 4, attrs
);
3032 addr1
+= memory_region_get_ram_addr(mr
) & TARGET_PAGE_MASK
;
3033 ptr
= qemu_get_ram_ptr(addr1
);
3036 if (unlikely(in_migration
)) {
3037 if (cpu_physical_memory_is_clean(addr1
)) {
3038 /* invalidate code */
3039 tb_invalidate_phys_page_range(addr1
, addr1
+ 4, 0);
3041 cpu_physical_memory_set_dirty_range_nocode(addr1
, 4);
3052 void stl_phys_notdirty(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
3054 address_space_stl_notdirty(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3057 /* warning: addr must be aligned */
3058 static inline void address_space_stl_internal(AddressSpace
*as
,
3059 hwaddr addr
, uint32_t val
,
3061 MemTxResult
*result
,
3062 enum device_endian endian
)
3071 mr
= address_space_translate(as
, addr
, &addr1
, &l
,
3073 if (l
< 4 || !memory_access_is_direct(mr
, true)) {
3074 #if defined(TARGET_WORDS_BIGENDIAN)
3075 if (endian
== DEVICE_LITTLE_ENDIAN
) {
3079 if (endian
== DEVICE_BIG_ENDIAN
) {
3083 r
= memory_region_dispatch_write(mr
, addr1
, val
, 4, attrs
);
3086 addr1
+= memory_region_get_ram_addr(mr
) & TARGET_PAGE_MASK
;
3087 ptr
= qemu_get_ram_ptr(addr1
);
3089 case DEVICE_LITTLE_ENDIAN
:
3092 case DEVICE_BIG_ENDIAN
:
3099 invalidate_and_set_dirty(addr1
, 4);
3108 void address_space_stl(AddressSpace
*as
, hwaddr addr
, uint32_t val
,
3109 MemTxAttrs attrs
, MemTxResult
*result
)
3111 address_space_stl_internal(as
, addr
, val
, attrs
, result
,
3112 DEVICE_NATIVE_ENDIAN
);
3115 void address_space_stl_le(AddressSpace
*as
, hwaddr addr
, uint32_t val
,
3116 MemTxAttrs attrs
, MemTxResult
*result
)
3118 address_space_stl_internal(as
, addr
, val
, attrs
, result
,
3119 DEVICE_LITTLE_ENDIAN
);
3122 void address_space_stl_be(AddressSpace
*as
, hwaddr addr
, uint32_t val
,
3123 MemTxAttrs attrs
, MemTxResult
*result
)
3125 address_space_stl_internal(as
, addr
, val
, attrs
, result
,
3129 void stl_phys(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
3131 address_space_stl(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3134 void stl_le_phys(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
3136 address_space_stl_le(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3139 void stl_be_phys(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
3141 address_space_stl_be(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3145 void address_space_stb(AddressSpace
*as
, hwaddr addr
, uint32_t val
,
3146 MemTxAttrs attrs
, MemTxResult
*result
)
3151 r
= address_space_rw(as
, addr
, attrs
, &v
, 1, 1);
3157 void stb_phys(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
3159 address_space_stb(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3162 /* warning: addr must be aligned */
3163 static inline void address_space_stw_internal(AddressSpace
*as
,
3164 hwaddr addr
, uint32_t val
,
3166 MemTxResult
*result
,
3167 enum device_endian endian
)
3176 mr
= address_space_translate(as
, addr
, &addr1
, &l
, true);
3177 if (l
< 2 || !memory_access_is_direct(mr
, true)) {
3178 #if defined(TARGET_WORDS_BIGENDIAN)
3179 if (endian
== DEVICE_LITTLE_ENDIAN
) {
3183 if (endian
== DEVICE_BIG_ENDIAN
) {
3187 r
= memory_region_dispatch_write(mr
, addr1
, val
, 2, attrs
);
3190 addr1
+= memory_region_get_ram_addr(mr
) & TARGET_PAGE_MASK
;
3191 ptr
= qemu_get_ram_ptr(addr1
);
3193 case DEVICE_LITTLE_ENDIAN
:
3196 case DEVICE_BIG_ENDIAN
:
3203 invalidate_and_set_dirty(addr1
, 2);
3212 void address_space_stw(AddressSpace
*as
, hwaddr addr
, uint32_t val
,
3213 MemTxAttrs attrs
, MemTxResult
*result
)
3215 address_space_stw_internal(as
, addr
, val
, attrs
, result
,
3216 DEVICE_NATIVE_ENDIAN
);
3219 void address_space_stw_le(AddressSpace
*as
, hwaddr addr
, uint32_t val
,
3220 MemTxAttrs attrs
, MemTxResult
*result
)
3222 address_space_stw_internal(as
, addr
, val
, attrs
, result
,
3223 DEVICE_LITTLE_ENDIAN
);
3226 void address_space_stw_be(AddressSpace
*as
, hwaddr addr
, uint32_t val
,
3227 MemTxAttrs attrs
, MemTxResult
*result
)
3229 address_space_stw_internal(as
, addr
, val
, attrs
, result
,
3233 void stw_phys(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
3235 address_space_stw(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3238 void stw_le_phys(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
3240 address_space_stw_le(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3243 void stw_be_phys(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
3245 address_space_stw_be(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3249 void address_space_stq(AddressSpace
*as
, hwaddr addr
, uint64_t val
,
3250 MemTxAttrs attrs
, MemTxResult
*result
)
3254 r
= address_space_rw(as
, addr
, attrs
, (void *) &val
, 8, 1);
3260 void address_space_stq_le(AddressSpace
*as
, hwaddr addr
, uint64_t val
,
3261 MemTxAttrs attrs
, MemTxResult
*result
)
3264 val
= cpu_to_le64(val
);
3265 r
= address_space_rw(as
, addr
, attrs
, (void *) &val
, 8, 1);
3270 void address_space_stq_be(AddressSpace
*as
, hwaddr addr
, uint64_t val
,
3271 MemTxAttrs attrs
, MemTxResult
*result
)
3274 val
= cpu_to_be64(val
);
3275 r
= address_space_rw(as
, addr
, attrs
, (void *) &val
, 8, 1);
3281 void stq_phys(AddressSpace
*as
, hwaddr addr
, uint64_t val
)
3283 address_space_stq(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3286 void stq_le_phys(AddressSpace
*as
, hwaddr addr
, uint64_t val
)
3288 address_space_stq_le(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3291 void stq_be_phys(AddressSpace
*as
, hwaddr addr
, uint64_t val
)
3293 address_space_stq_be(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3296 /* virtual memory access for debug (includes writing to ROM) */
3297 int cpu_memory_rw_debug(CPUState
*cpu
, target_ulong addr
,
3298 uint8_t *buf
, int len
, int is_write
)
3305 page
= addr
& TARGET_PAGE_MASK
;
3306 phys_addr
= cpu_get_phys_page_debug(cpu
, page
);
3307 /* if no physical page mapped, return an error */
3308 if (phys_addr
== -1)
3310 l
= (page
+ TARGET_PAGE_SIZE
) - addr
;
3313 phys_addr
+= (addr
& ~TARGET_PAGE_MASK
);
3315 cpu_physical_memory_write_rom(cpu
->as
, phys_addr
, buf
, l
);
3317 address_space_rw(cpu
->as
, phys_addr
, MEMTXATTRS_UNSPECIFIED
,
3329 * A helper function for the _utterly broken_ virtio device model to find out if
3330 * it's running on a big endian machine. Don't do this at home kids!
3332 bool target_words_bigendian(void);
3333 bool target_words_bigendian(void)
3335 #if defined(TARGET_WORDS_BIGENDIAN)
3342 #ifndef CONFIG_USER_ONLY
3343 bool cpu_physical_memory_is_io(hwaddr phys_addr
)
3350 mr
= address_space_translate(&address_space_memory
,
3351 phys_addr
, &phys_addr
, &l
, false);
3353 res
= !(memory_region_is_ram(mr
) || memory_region_is_romd(mr
));
3358 void qemu_ram_foreach_block(RAMBlockIterFunc func
, void *opaque
)
3363 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
3364 func(block
->host
, block
->offset
, block
->used_length
, opaque
);