4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21 #include <sys/types.h>
25 #include "qemu-common.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
33 #include "qemu/osdep.h"
34 #include "sysemu/kvm.h"
35 #include "sysemu/sysemu.h"
36 #include "hw/xen/xen.h"
37 #include "qemu/timer.h"
38 #include "qemu/config-file.h"
39 #include "qemu/error-report.h"
40 #include "exec/memory.h"
41 #include "sysemu/dma.h"
42 #include "exec/address-spaces.h"
43 #if defined(CONFIG_USER_ONLY)
45 #else /* !CONFIG_USER_ONLY */
46 #include "sysemu/xen-mapcache.h"
49 #include "exec/cpu-all.h"
50 #include "qemu/rcu_queue.h"
51 #include "exec/cputlb.h"
52 #include "translate-all.h"
54 #include "exec/memory-internal.h"
55 #include "exec/ram_addr.h"
57 #include "qemu/range.h"
59 //#define DEBUG_SUBPAGE
61 #if !defined(CONFIG_USER_ONLY)
62 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
63 * are protected by the ramlist lock.
65 RAMList ram_list
= { .blocks
= QLIST_HEAD_INITIALIZER(ram_list
.blocks
) };
67 static MemoryRegion
*system_memory
;
68 static MemoryRegion
*system_io
;
70 AddressSpace address_space_io
;
71 AddressSpace address_space_memory
;
73 MemoryRegion io_mem_rom
, io_mem_notdirty
;
74 static MemoryRegion io_mem_unassigned
;
76 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
77 #define RAM_PREALLOC (1 << 0)
79 /* RAM is mmap-ed with MAP_SHARED */
80 #define RAM_SHARED (1 << 1)
82 /* Only a portion of RAM (used_length) is actually used, and migrated.
83 * This used_length size can change across reboots.
85 #define RAM_RESIZEABLE (1 << 2)
89 struct CPUTailQ cpus
= QTAILQ_HEAD_INITIALIZER(cpus
);
90 /* current CPU in the current thread. It is only valid inside
92 DEFINE_TLS(CPUState
*, current_cpu
);
93 /* 0 = Do not count executed instructions.
94 1 = Precise instruction counting.
95 2 = Adaptive rate instruction counting. */
98 #if !defined(CONFIG_USER_ONLY)
100 typedef struct PhysPageEntry PhysPageEntry
;
102 struct PhysPageEntry
{
103 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
105 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
109 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
111 /* Size of the L2 (and L3, etc) page tables. */
112 #define ADDR_SPACE_BITS 64
115 #define P_L2_SIZE (1 << P_L2_BITS)
117 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
119 typedef PhysPageEntry Node
[P_L2_SIZE
];
121 typedef struct PhysPageMap
{
124 unsigned sections_nb
;
125 unsigned sections_nb_alloc
;
127 unsigned nodes_nb_alloc
;
129 MemoryRegionSection
*sections
;
132 struct AddressSpaceDispatch
{
135 /* This is a multi-level map on the physical address space.
136 * The bottom level has pointers to MemoryRegionSections.
138 PhysPageEntry phys_map
;
143 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
144 typedef struct subpage_t
{
148 uint16_t sub_section
[TARGET_PAGE_SIZE
];
151 #define PHYS_SECTION_UNASSIGNED 0
152 #define PHYS_SECTION_NOTDIRTY 1
153 #define PHYS_SECTION_ROM 2
154 #define PHYS_SECTION_WATCH 3
156 static void io_mem_init(void);
157 static void memory_map_init(void);
158 static void tcg_commit(MemoryListener
*listener
);
160 static MemoryRegion io_mem_watch
;
163 #if !defined(CONFIG_USER_ONLY)
165 static void phys_map_node_reserve(PhysPageMap
*map
, unsigned nodes
)
167 if (map
->nodes_nb
+ nodes
> map
->nodes_nb_alloc
) {
168 map
->nodes_nb_alloc
= MAX(map
->nodes_nb_alloc
* 2, 16);
169 map
->nodes_nb_alloc
= MAX(map
->nodes_nb_alloc
, map
->nodes_nb
+ nodes
);
170 map
->nodes
= g_renew(Node
, map
->nodes
, map
->nodes_nb_alloc
);
174 static uint32_t phys_map_node_alloc(PhysPageMap
*map
, bool leaf
)
181 ret
= map
->nodes_nb
++;
183 assert(ret
!= PHYS_MAP_NODE_NIL
);
184 assert(ret
!= map
->nodes_nb_alloc
);
186 e
.skip
= leaf
? 0 : 1;
187 e
.ptr
= leaf
? PHYS_SECTION_UNASSIGNED
: PHYS_MAP_NODE_NIL
;
188 for (i
= 0; i
< P_L2_SIZE
; ++i
) {
189 memcpy(&p
[i
], &e
, sizeof(e
));
194 static void phys_page_set_level(PhysPageMap
*map
, PhysPageEntry
*lp
,
195 hwaddr
*index
, hwaddr
*nb
, uint16_t leaf
,
199 hwaddr step
= (hwaddr
)1 << (level
* P_L2_BITS
);
201 if (lp
->skip
&& lp
->ptr
== PHYS_MAP_NODE_NIL
) {
202 lp
->ptr
= phys_map_node_alloc(map
, level
== 0);
204 p
= map
->nodes
[lp
->ptr
];
205 lp
= &p
[(*index
>> (level
* P_L2_BITS
)) & (P_L2_SIZE
- 1)];
207 while (*nb
&& lp
< &p
[P_L2_SIZE
]) {
208 if ((*index
& (step
- 1)) == 0 && *nb
>= step
) {
214 phys_page_set_level(map
, lp
, index
, nb
, leaf
, level
- 1);
220 static void phys_page_set(AddressSpaceDispatch
*d
,
221 hwaddr index
, hwaddr nb
,
224 /* Wildly overreserve - it doesn't matter much. */
225 phys_map_node_reserve(&d
->map
, 3 * P_L2_LEVELS
);
227 phys_page_set_level(&d
->map
, &d
->phys_map
, &index
, &nb
, leaf
, P_L2_LEVELS
- 1);
230 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
231 * and update our entry so we can skip it and go directly to the destination.
233 static void phys_page_compact(PhysPageEntry
*lp
, Node
*nodes
, unsigned long *compacted
)
235 unsigned valid_ptr
= P_L2_SIZE
;
240 if (lp
->ptr
== PHYS_MAP_NODE_NIL
) {
245 for (i
= 0; i
< P_L2_SIZE
; i
++) {
246 if (p
[i
].ptr
== PHYS_MAP_NODE_NIL
) {
253 phys_page_compact(&p
[i
], nodes
, compacted
);
257 /* We can only compress if there's only one child. */
262 assert(valid_ptr
< P_L2_SIZE
);
264 /* Don't compress if it won't fit in the # of bits we have. */
265 if (lp
->skip
+ p
[valid_ptr
].skip
>= (1 << 3)) {
269 lp
->ptr
= p
[valid_ptr
].ptr
;
270 if (!p
[valid_ptr
].skip
) {
271 /* If our only child is a leaf, make this a leaf. */
272 /* By design, we should have made this node a leaf to begin with so we
273 * should never reach here.
274 * But since it's so simple to handle this, let's do it just in case we
279 lp
->skip
+= p
[valid_ptr
].skip
;
283 static void phys_page_compact_all(AddressSpaceDispatch
*d
, int nodes_nb
)
285 DECLARE_BITMAP(compacted
, nodes_nb
);
287 if (d
->phys_map
.skip
) {
288 phys_page_compact(&d
->phys_map
, d
->map
.nodes
, compacted
);
292 static MemoryRegionSection
*phys_page_find(PhysPageEntry lp
, hwaddr addr
,
293 Node
*nodes
, MemoryRegionSection
*sections
)
296 hwaddr index
= addr
>> TARGET_PAGE_BITS
;
299 for (i
= P_L2_LEVELS
; lp
.skip
&& (i
-= lp
.skip
) >= 0;) {
300 if (lp
.ptr
== PHYS_MAP_NODE_NIL
) {
301 return §ions
[PHYS_SECTION_UNASSIGNED
];
304 lp
= p
[(index
>> (i
* P_L2_BITS
)) & (P_L2_SIZE
- 1)];
307 if (sections
[lp
.ptr
].size
.hi
||
308 range_covers_byte(sections
[lp
.ptr
].offset_within_address_space
,
309 sections
[lp
.ptr
].size
.lo
, addr
)) {
310 return §ions
[lp
.ptr
];
312 return §ions
[PHYS_SECTION_UNASSIGNED
];
316 bool memory_region_is_unassigned(MemoryRegion
*mr
)
318 return mr
!= &io_mem_rom
&& mr
!= &io_mem_notdirty
&& !mr
->rom_device
319 && mr
!= &io_mem_watch
;
322 /* Called from RCU critical section */
323 static MemoryRegionSection
*address_space_lookup_region(AddressSpaceDispatch
*d
,
325 bool resolve_subpage
)
327 MemoryRegionSection
*section
;
330 section
= phys_page_find(d
->phys_map
, addr
, d
->map
.nodes
, d
->map
.sections
);
331 if (resolve_subpage
&& section
->mr
->subpage
) {
332 subpage
= container_of(section
->mr
, subpage_t
, iomem
);
333 section
= &d
->map
.sections
[subpage
->sub_section
[SUBPAGE_IDX(addr
)]];
338 /* Called from RCU critical section */
339 static MemoryRegionSection
*
340 address_space_translate_internal(AddressSpaceDispatch
*d
, hwaddr addr
, hwaddr
*xlat
,
341 hwaddr
*plen
, bool resolve_subpage
)
343 MemoryRegionSection
*section
;
346 section
= address_space_lookup_region(d
, addr
, resolve_subpage
);
347 /* Compute offset within MemoryRegionSection */
348 addr
-= section
->offset_within_address_space
;
350 /* Compute offset within MemoryRegion */
351 *xlat
= addr
+ section
->offset_within_region
;
353 diff
= int128_sub(section
->mr
->size
, int128_make64(addr
));
354 *plen
= int128_get64(int128_min(diff
, int128_make64(*plen
)));
358 static inline bool memory_access_is_direct(MemoryRegion
*mr
, bool is_write
)
360 if (memory_region_is_ram(mr
)) {
361 return !(is_write
&& mr
->readonly
);
363 if (memory_region_is_romd(mr
)) {
370 /* Called from RCU critical section */
371 MemoryRegion
*address_space_translate(AddressSpace
*as
, hwaddr addr
,
372 hwaddr
*xlat
, hwaddr
*plen
,
376 MemoryRegionSection
*section
;
380 AddressSpaceDispatch
*d
= atomic_rcu_read(&as
->dispatch
);
381 section
= address_space_translate_internal(d
, addr
, &addr
, plen
, true);
384 if (!mr
->iommu_ops
) {
388 iotlb
= mr
->iommu_ops
->translate(mr
, addr
, is_write
);
389 addr
= ((iotlb
.translated_addr
& ~iotlb
.addr_mask
)
390 | (addr
& iotlb
.addr_mask
));
391 *plen
= MIN(*plen
, (addr
| iotlb
.addr_mask
) - addr
+ 1);
392 if (!(iotlb
.perm
& (1 << is_write
))) {
393 mr
= &io_mem_unassigned
;
397 as
= iotlb
.target_as
;
400 if (xen_enabled() && memory_access_is_direct(mr
, is_write
)) {
401 hwaddr page
= ((addr
& TARGET_PAGE_MASK
) + TARGET_PAGE_SIZE
) - addr
;
402 *plen
= MIN(page
, *plen
);
409 /* Called from RCU critical section */
410 MemoryRegionSection
*
411 address_space_translate_for_iotlb(CPUState
*cpu
, hwaddr addr
,
412 hwaddr
*xlat
, hwaddr
*plen
)
414 MemoryRegionSection
*section
;
415 section
= address_space_translate_internal(cpu
->memory_dispatch
,
416 addr
, xlat
, plen
, false);
418 assert(!section
->mr
->iommu_ops
);
423 #if !defined(CONFIG_USER_ONLY)
425 static int cpu_common_post_load(void *opaque
, int version_id
)
427 CPUState
*cpu
= opaque
;
429 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
430 version_id is increased. */
431 cpu
->interrupt_request
&= ~0x01;
437 static int cpu_common_pre_load(void *opaque
)
439 CPUState
*cpu
= opaque
;
441 cpu
->exception_index
= -1;
446 static bool cpu_common_exception_index_needed(void *opaque
)
448 CPUState
*cpu
= opaque
;
450 return tcg_enabled() && cpu
->exception_index
!= -1;
453 static const VMStateDescription vmstate_cpu_common_exception_index
= {
454 .name
= "cpu_common/exception_index",
456 .minimum_version_id
= 1,
457 .fields
= (VMStateField
[]) {
458 VMSTATE_INT32(exception_index
, CPUState
),
459 VMSTATE_END_OF_LIST()
463 const VMStateDescription vmstate_cpu_common
= {
464 .name
= "cpu_common",
466 .minimum_version_id
= 1,
467 .pre_load
= cpu_common_pre_load
,
468 .post_load
= cpu_common_post_load
,
469 .fields
= (VMStateField
[]) {
470 VMSTATE_UINT32(halted
, CPUState
),
471 VMSTATE_UINT32(interrupt_request
, CPUState
),
472 VMSTATE_END_OF_LIST()
474 .subsections
= (VMStateSubsection
[]) {
476 .vmsd
= &vmstate_cpu_common_exception_index
,
477 .needed
= cpu_common_exception_index_needed
,
486 CPUState
*qemu_get_cpu(int index
)
491 if (cpu
->cpu_index
== index
) {
499 #if !defined(CONFIG_USER_ONLY)
500 void tcg_cpu_address_space_init(CPUState
*cpu
, AddressSpace
*as
)
502 /* We only support one address space per cpu at the moment. */
503 assert(cpu
->as
== as
);
505 if (cpu
->tcg_as_listener
) {
506 memory_listener_unregister(cpu
->tcg_as_listener
);
508 cpu
->tcg_as_listener
= g_new0(MemoryListener
, 1);
510 cpu
->tcg_as_listener
->commit
= tcg_commit
;
511 memory_listener_register(cpu
->tcg_as_listener
, as
);
515 void cpu_exec_init(CPUArchState
*env
)
517 CPUState
*cpu
= ENV_GET_CPU(env
);
518 CPUClass
*cc
= CPU_GET_CLASS(cpu
);
522 #if defined(CONFIG_USER_ONLY)
526 CPU_FOREACH(some_cpu
) {
529 cpu
->cpu_index
= cpu_index
;
531 QTAILQ_INIT(&cpu
->breakpoints
);
532 QTAILQ_INIT(&cpu
->watchpoints
);
533 #ifndef CONFIG_USER_ONLY
534 cpu
->as
= &address_space_memory
;
535 cpu
->thread_id
= qemu_get_thread_id();
536 cpu_reload_memory_map(cpu
);
538 QTAILQ_INSERT_TAIL(&cpus
, cpu
, node
);
539 #if defined(CONFIG_USER_ONLY)
542 if (qdev_get_vmsd(DEVICE(cpu
)) == NULL
) {
543 vmstate_register(NULL
, cpu_index
, &vmstate_cpu_common
, cpu
);
545 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
546 register_savevm(NULL
, "cpu", cpu_index
, CPU_SAVE_VERSION
,
547 cpu_save
, cpu_load
, env
);
548 assert(cc
->vmsd
== NULL
);
549 assert(qdev_get_vmsd(DEVICE(cpu
)) == NULL
);
551 if (cc
->vmsd
!= NULL
) {
552 vmstate_register(NULL
, cpu_index
, cc
->vmsd
, cpu
);
556 #if defined(CONFIG_USER_ONLY)
557 static void breakpoint_invalidate(CPUState
*cpu
, target_ulong pc
)
559 tb_invalidate_phys_page_range(pc
, pc
+ 1, 0);
562 static void breakpoint_invalidate(CPUState
*cpu
, target_ulong pc
)
564 hwaddr phys
= cpu_get_phys_page_debug(cpu
, pc
);
566 tb_invalidate_phys_addr(cpu
->as
,
567 phys
| (pc
& ~TARGET_PAGE_MASK
));
572 #if defined(CONFIG_USER_ONLY)
573 void cpu_watchpoint_remove_all(CPUState
*cpu
, int mask
)
578 int cpu_watchpoint_remove(CPUState
*cpu
, vaddr addr
, vaddr len
,
584 void cpu_watchpoint_remove_by_ref(CPUState
*cpu
, CPUWatchpoint
*watchpoint
)
588 int cpu_watchpoint_insert(CPUState
*cpu
, vaddr addr
, vaddr len
,
589 int flags
, CPUWatchpoint
**watchpoint
)
594 /* Add a watchpoint. */
595 int cpu_watchpoint_insert(CPUState
*cpu
, vaddr addr
, vaddr len
,
596 int flags
, CPUWatchpoint
**watchpoint
)
600 /* forbid ranges which are empty or run off the end of the address space */
601 if (len
== 0 || (addr
+ len
- 1) < addr
) {
602 error_report("tried to set invalid watchpoint at %"
603 VADDR_PRIx
", len=%" VADDR_PRIu
, addr
, len
);
606 wp
= g_malloc(sizeof(*wp
));
612 /* keep all GDB-injected watchpoints in front */
613 if (flags
& BP_GDB
) {
614 QTAILQ_INSERT_HEAD(&cpu
->watchpoints
, wp
, entry
);
616 QTAILQ_INSERT_TAIL(&cpu
->watchpoints
, wp
, entry
);
619 tlb_flush_page(cpu
, addr
);
626 /* Remove a specific watchpoint. */
627 int cpu_watchpoint_remove(CPUState
*cpu
, vaddr addr
, vaddr len
,
632 QTAILQ_FOREACH(wp
, &cpu
->watchpoints
, entry
) {
633 if (addr
== wp
->vaddr
&& len
== wp
->len
634 && flags
== (wp
->flags
& ~BP_WATCHPOINT_HIT
)) {
635 cpu_watchpoint_remove_by_ref(cpu
, wp
);
642 /* Remove a specific watchpoint by reference. */
643 void cpu_watchpoint_remove_by_ref(CPUState
*cpu
, CPUWatchpoint
*watchpoint
)
645 QTAILQ_REMOVE(&cpu
->watchpoints
, watchpoint
, entry
);
647 tlb_flush_page(cpu
, watchpoint
->vaddr
);
652 /* Remove all matching watchpoints. */
653 void cpu_watchpoint_remove_all(CPUState
*cpu
, int mask
)
655 CPUWatchpoint
*wp
, *next
;
657 QTAILQ_FOREACH_SAFE(wp
, &cpu
->watchpoints
, entry
, next
) {
658 if (wp
->flags
& mask
) {
659 cpu_watchpoint_remove_by_ref(cpu
, wp
);
664 /* Return true if this watchpoint address matches the specified
665 * access (ie the address range covered by the watchpoint overlaps
666 * partially or completely with the address range covered by the
669 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint
*wp
,
673 /* We know the lengths are non-zero, but a little caution is
674 * required to avoid errors in the case where the range ends
675 * exactly at the top of the address space and so addr + len
676 * wraps round to zero.
678 vaddr wpend
= wp
->vaddr
+ wp
->len
- 1;
679 vaddr addrend
= addr
+ len
- 1;
681 return !(addr
> wpend
|| wp
->vaddr
> addrend
);
686 /* Add a breakpoint. */
687 int cpu_breakpoint_insert(CPUState
*cpu
, vaddr pc
, int flags
,
688 CPUBreakpoint
**breakpoint
)
692 bp
= g_malloc(sizeof(*bp
));
697 /* keep all GDB-injected breakpoints in front */
698 if (flags
& BP_GDB
) {
699 QTAILQ_INSERT_HEAD(&cpu
->breakpoints
, bp
, entry
);
701 QTAILQ_INSERT_TAIL(&cpu
->breakpoints
, bp
, entry
);
704 breakpoint_invalidate(cpu
, pc
);
712 /* Remove a specific breakpoint. */
713 int cpu_breakpoint_remove(CPUState
*cpu
, vaddr pc
, int flags
)
717 QTAILQ_FOREACH(bp
, &cpu
->breakpoints
, entry
) {
718 if (bp
->pc
== pc
&& bp
->flags
== flags
) {
719 cpu_breakpoint_remove_by_ref(cpu
, bp
);
726 /* Remove a specific breakpoint by reference. */
727 void cpu_breakpoint_remove_by_ref(CPUState
*cpu
, CPUBreakpoint
*breakpoint
)
729 QTAILQ_REMOVE(&cpu
->breakpoints
, breakpoint
, entry
);
731 breakpoint_invalidate(cpu
, breakpoint
->pc
);
736 /* Remove all matching breakpoints. */
737 void cpu_breakpoint_remove_all(CPUState
*cpu
, int mask
)
739 CPUBreakpoint
*bp
, *next
;
741 QTAILQ_FOREACH_SAFE(bp
, &cpu
->breakpoints
, entry
, next
) {
742 if (bp
->flags
& mask
) {
743 cpu_breakpoint_remove_by_ref(cpu
, bp
);
748 /* enable or disable single step mode. EXCP_DEBUG is returned by the
749 CPU loop after each instruction */
750 void cpu_single_step(CPUState
*cpu
, int enabled
)
752 if (cpu
->singlestep_enabled
!= enabled
) {
753 cpu
->singlestep_enabled
= enabled
;
755 kvm_update_guest_debug(cpu
, 0);
757 /* must flush all the translated code to avoid inconsistencies */
758 /* XXX: only flush what is necessary */
759 CPUArchState
*env
= cpu
->env_ptr
;
765 void cpu_abort(CPUState
*cpu
, const char *fmt
, ...)
772 fprintf(stderr
, "qemu: fatal: ");
773 vfprintf(stderr
, fmt
, ap
);
774 fprintf(stderr
, "\n");
775 cpu_dump_state(cpu
, stderr
, fprintf
, CPU_DUMP_FPU
| CPU_DUMP_CCOP
);
776 if (qemu_log_enabled()) {
777 qemu_log("qemu: fatal: ");
778 qemu_log_vprintf(fmt
, ap2
);
780 log_cpu_state(cpu
, CPU_DUMP_FPU
| CPU_DUMP_CCOP
);
786 #if defined(CONFIG_USER_ONLY)
788 struct sigaction act
;
789 sigfillset(&act
.sa_mask
);
790 act
.sa_handler
= SIG_DFL
;
791 sigaction(SIGABRT
, &act
, NULL
);
797 #if !defined(CONFIG_USER_ONLY)
798 /* Called from RCU critical section */
799 static RAMBlock
*qemu_get_ram_block(ram_addr_t addr
)
803 block
= atomic_rcu_read(&ram_list
.mru_block
);
804 if (block
&& addr
- block
->offset
< block
->max_length
) {
807 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
808 if (addr
- block
->offset
< block
->max_length
) {
813 fprintf(stderr
, "Bad ram offset %" PRIx64
"\n", (uint64_t)addr
);
817 /* It is safe to write mru_block outside the iothread lock. This
822 * xxx removed from list
826 * call_rcu(reclaim_ramblock, xxx);
829 * atomic_rcu_set is not needed here. The block was already published
830 * when it was placed into the list. Here we're just making an extra
831 * copy of the pointer.
833 ram_list
.mru_block
= block
;
837 static void tlb_reset_dirty_range_all(ram_addr_t start
, ram_addr_t length
)
843 end
= TARGET_PAGE_ALIGN(start
+ length
);
844 start
&= TARGET_PAGE_MASK
;
847 block
= qemu_get_ram_block(start
);
848 assert(block
== qemu_get_ram_block(end
- 1));
849 start1
= (uintptr_t)ramblock_ptr(block
, start
- block
->offset
);
850 cpu_tlb_reset_dirty_all(start1
, length
);
854 /* Note: start and end must be within the same ram block. */
855 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start
,
859 unsigned long end
, page
;
866 end
= TARGET_PAGE_ALIGN(start
+ length
) >> TARGET_PAGE_BITS
;
867 page
= start
>> TARGET_PAGE_BITS
;
868 dirty
= bitmap_test_and_clear_atomic(ram_list
.dirty_memory
[client
],
871 if (dirty
&& tcg_enabled()) {
872 tlb_reset_dirty_range_all(start
, length
);
878 /* Called from RCU critical section */
879 hwaddr
memory_region_section_get_iotlb(CPUState
*cpu
,
880 MemoryRegionSection
*section
,
882 hwaddr paddr
, hwaddr xlat
,
884 target_ulong
*address
)
889 if (memory_region_is_ram(section
->mr
)) {
891 iotlb
= (memory_region_get_ram_addr(section
->mr
) & TARGET_PAGE_MASK
)
893 if (!section
->readonly
) {
894 iotlb
|= PHYS_SECTION_NOTDIRTY
;
896 iotlb
|= PHYS_SECTION_ROM
;
899 iotlb
= section
- section
->address_space
->dispatch
->map
.sections
;
903 /* Make accesses to pages with watchpoints go via the
904 watchpoint trap routines. */
905 QTAILQ_FOREACH(wp
, &cpu
->watchpoints
, entry
) {
906 if (cpu_watchpoint_address_matches(wp
, vaddr
, TARGET_PAGE_SIZE
)) {
907 /* Avoid trapping reads of pages with a write breakpoint. */
908 if ((prot
& PAGE_WRITE
) || (wp
->flags
& BP_MEM_READ
)) {
909 iotlb
= PHYS_SECTION_WATCH
+ paddr
;
910 *address
|= TLB_MMIO
;
918 #endif /* defined(CONFIG_USER_ONLY) */
920 #if !defined(CONFIG_USER_ONLY)
922 static int subpage_register (subpage_t
*mmio
, uint32_t start
, uint32_t end
,
924 static subpage_t
*subpage_init(AddressSpace
*as
, hwaddr base
);
926 static void *(*phys_mem_alloc
)(size_t size
, uint64_t *align
) =
930 * Set a custom physical guest memory alloator.
931 * Accelerators with unusual needs may need this. Hopefully, we can
932 * get rid of it eventually.
934 void phys_mem_set_alloc(void *(*alloc
)(size_t, uint64_t *align
))
936 phys_mem_alloc
= alloc
;
939 static uint16_t phys_section_add(PhysPageMap
*map
,
940 MemoryRegionSection
*section
)
942 /* The physical section number is ORed with a page-aligned
943 * pointer to produce the iotlb entries. Thus it should
944 * never overflow into the page-aligned value.
946 assert(map
->sections_nb
< TARGET_PAGE_SIZE
);
948 if (map
->sections_nb
== map
->sections_nb_alloc
) {
949 map
->sections_nb_alloc
= MAX(map
->sections_nb_alloc
* 2, 16);
950 map
->sections
= g_renew(MemoryRegionSection
, map
->sections
,
951 map
->sections_nb_alloc
);
953 map
->sections
[map
->sections_nb
] = *section
;
954 memory_region_ref(section
->mr
);
955 return map
->sections_nb
++;
958 static void phys_section_destroy(MemoryRegion
*mr
)
960 memory_region_unref(mr
);
963 subpage_t
*subpage
= container_of(mr
, subpage_t
, iomem
);
964 object_unref(OBJECT(&subpage
->iomem
));
969 static void phys_sections_free(PhysPageMap
*map
)
971 while (map
->sections_nb
> 0) {
972 MemoryRegionSection
*section
= &map
->sections
[--map
->sections_nb
];
973 phys_section_destroy(section
->mr
);
975 g_free(map
->sections
);
979 static void register_subpage(AddressSpaceDispatch
*d
, MemoryRegionSection
*section
)
982 hwaddr base
= section
->offset_within_address_space
984 MemoryRegionSection
*existing
= phys_page_find(d
->phys_map
, base
,
985 d
->map
.nodes
, d
->map
.sections
);
986 MemoryRegionSection subsection
= {
987 .offset_within_address_space
= base
,
988 .size
= int128_make64(TARGET_PAGE_SIZE
),
992 assert(existing
->mr
->subpage
|| existing
->mr
== &io_mem_unassigned
);
994 if (!(existing
->mr
->subpage
)) {
995 subpage
= subpage_init(d
->as
, base
);
996 subsection
.address_space
= d
->as
;
997 subsection
.mr
= &subpage
->iomem
;
998 phys_page_set(d
, base
>> TARGET_PAGE_BITS
, 1,
999 phys_section_add(&d
->map
, &subsection
));
1001 subpage
= container_of(existing
->mr
, subpage_t
, iomem
);
1003 start
= section
->offset_within_address_space
& ~TARGET_PAGE_MASK
;
1004 end
= start
+ int128_get64(section
->size
) - 1;
1005 subpage_register(subpage
, start
, end
,
1006 phys_section_add(&d
->map
, section
));
1010 static void register_multipage(AddressSpaceDispatch
*d
,
1011 MemoryRegionSection
*section
)
1013 hwaddr start_addr
= section
->offset_within_address_space
;
1014 uint16_t section_index
= phys_section_add(&d
->map
, section
);
1015 uint64_t num_pages
= int128_get64(int128_rshift(section
->size
,
1019 phys_page_set(d
, start_addr
>> TARGET_PAGE_BITS
, num_pages
, section_index
);
1022 static void mem_add(MemoryListener
*listener
, MemoryRegionSection
*section
)
1024 AddressSpace
*as
= container_of(listener
, AddressSpace
, dispatch_listener
);
1025 AddressSpaceDispatch
*d
= as
->next_dispatch
;
1026 MemoryRegionSection now
= *section
, remain
= *section
;
1027 Int128 page_size
= int128_make64(TARGET_PAGE_SIZE
);
1029 if (now
.offset_within_address_space
& ~TARGET_PAGE_MASK
) {
1030 uint64_t left
= TARGET_PAGE_ALIGN(now
.offset_within_address_space
)
1031 - now
.offset_within_address_space
;
1033 now
.size
= int128_min(int128_make64(left
), now
.size
);
1034 register_subpage(d
, &now
);
1036 now
.size
= int128_zero();
1038 while (int128_ne(remain
.size
, now
.size
)) {
1039 remain
.size
= int128_sub(remain
.size
, now
.size
);
1040 remain
.offset_within_address_space
+= int128_get64(now
.size
);
1041 remain
.offset_within_region
+= int128_get64(now
.size
);
1043 if (int128_lt(remain
.size
, page_size
)) {
1044 register_subpage(d
, &now
);
1045 } else if (remain
.offset_within_address_space
& ~TARGET_PAGE_MASK
) {
1046 now
.size
= page_size
;
1047 register_subpage(d
, &now
);
1049 now
.size
= int128_and(now
.size
, int128_neg(page_size
));
1050 register_multipage(d
, &now
);
1055 void qemu_flush_coalesced_mmio_buffer(void)
1058 kvm_flush_coalesced_mmio_buffer();
1061 void qemu_mutex_lock_ramlist(void)
1063 qemu_mutex_lock(&ram_list
.mutex
);
1066 void qemu_mutex_unlock_ramlist(void)
1068 qemu_mutex_unlock(&ram_list
.mutex
);
1073 #include <sys/vfs.h>
1075 #define HUGETLBFS_MAGIC 0x958458f6
1077 static long gethugepagesize(const char *path
, Error
**errp
)
1083 ret
= statfs(path
, &fs
);
1084 } while (ret
!= 0 && errno
== EINTR
);
1087 error_setg_errno(errp
, errno
, "failed to get page size of file %s",
1092 if (fs
.f_type
!= HUGETLBFS_MAGIC
)
1093 fprintf(stderr
, "Warning: path not on HugeTLBFS: %s\n", path
);
1098 static void *file_ram_alloc(RAMBlock
*block
,
1104 char *sanitized_name
;
1109 Error
*local_err
= NULL
;
1111 hpagesize
= gethugepagesize(path
, &local_err
);
1113 error_propagate(errp
, local_err
);
1116 block
->mr
->align
= hpagesize
;
1118 if (memory
< hpagesize
) {
1119 error_setg(errp
, "memory size 0x" RAM_ADDR_FMT
" must be equal to "
1120 "or larger than huge page size 0x%" PRIx64
,
1125 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1127 "host lacks kvm mmu notifiers, -mem-path unsupported");
1131 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1132 sanitized_name
= g_strdup(memory_region_name(block
->mr
));
1133 for (c
= sanitized_name
; *c
!= '\0'; c
++) {
1138 filename
= g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path
,
1140 g_free(sanitized_name
);
1142 fd
= mkstemp(filename
);
1144 error_setg_errno(errp
, errno
,
1145 "unable to create backing store for hugepages");
1152 memory
= (memory
+hpagesize
-1) & ~(hpagesize
-1);
1155 * ftruncate is not supported by hugetlbfs in older
1156 * hosts, so don't bother bailing out on errors.
1157 * If anything goes wrong with it under other filesystems,
1160 if (ftruncate(fd
, memory
)) {
1161 perror("ftruncate");
1164 area
= mmap(0, memory
, PROT_READ
| PROT_WRITE
,
1165 (block
->flags
& RAM_SHARED
? MAP_SHARED
: MAP_PRIVATE
),
1167 if (area
== MAP_FAILED
) {
1168 error_setg_errno(errp
, errno
,
1169 "unable to map backing store for hugepages");
1175 os_mem_prealloc(fd
, area
, memory
);
1183 error_report("%s", error_get_pretty(*errp
));
1190 /* Called with the ramlist lock held. */
1191 static ram_addr_t
find_ram_offset(ram_addr_t size
)
1193 RAMBlock
*block
, *next_block
;
1194 ram_addr_t offset
= RAM_ADDR_MAX
, mingap
= RAM_ADDR_MAX
;
1196 assert(size
!= 0); /* it would hand out same offset multiple times */
1198 if (QLIST_EMPTY_RCU(&ram_list
.blocks
)) {
1202 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1203 ram_addr_t end
, next
= RAM_ADDR_MAX
;
1205 end
= block
->offset
+ block
->max_length
;
1207 QLIST_FOREACH_RCU(next_block
, &ram_list
.blocks
, next
) {
1208 if (next_block
->offset
>= end
) {
1209 next
= MIN(next
, next_block
->offset
);
1212 if (next
- end
>= size
&& next
- end
< mingap
) {
1214 mingap
= next
- end
;
1218 if (offset
== RAM_ADDR_MAX
) {
1219 fprintf(stderr
, "Failed to find gap of requested size: %" PRIu64
"\n",
1227 ram_addr_t
last_ram_offset(void)
1230 ram_addr_t last
= 0;
1233 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1234 last
= MAX(last
, block
->offset
+ block
->max_length
);
1240 static void qemu_ram_setup_dump(void *addr
, ram_addr_t size
)
1244 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1245 if (!machine_dump_guest_core(current_machine
)) {
1246 ret
= qemu_madvise(addr
, size
, QEMU_MADV_DONTDUMP
);
1248 perror("qemu_madvise");
1249 fprintf(stderr
, "madvise doesn't support MADV_DONTDUMP, "
1250 "but dump_guest_core=off specified\n");
1255 /* Called within an RCU critical section, or while the ramlist lock
1258 static RAMBlock
*find_ram_block(ram_addr_t addr
)
1262 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1263 if (block
->offset
== addr
) {
1271 /* Called with iothread lock held. */
1272 void qemu_ram_set_idstr(ram_addr_t addr
, const char *name
, DeviceState
*dev
)
1274 RAMBlock
*new_block
, *block
;
1277 new_block
= find_ram_block(addr
);
1279 assert(!new_block
->idstr
[0]);
1282 char *id
= qdev_get_dev_path(dev
);
1284 snprintf(new_block
->idstr
, sizeof(new_block
->idstr
), "%s/", id
);
1288 pstrcat(new_block
->idstr
, sizeof(new_block
->idstr
), name
);
1290 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1291 if (block
!= new_block
&& !strcmp(block
->idstr
, new_block
->idstr
)) {
1292 fprintf(stderr
, "RAMBlock \"%s\" already registered, abort!\n",
1300 /* Called with iothread lock held. */
1301 void qemu_ram_unset_idstr(ram_addr_t addr
)
1305 /* FIXME: arch_init.c assumes that this is not called throughout
1306 * migration. Ignore the problem since hot-unplug during migration
1307 * does not work anyway.
1311 block
= find_ram_block(addr
);
1313 memset(block
->idstr
, 0, sizeof(block
->idstr
));
1318 static int memory_try_enable_merging(void *addr
, size_t len
)
1320 if (!machine_mem_merge(current_machine
)) {
1321 /* disabled by the user */
1325 return qemu_madvise(addr
, len
, QEMU_MADV_MERGEABLE
);
1328 /* Only legal before guest might have detected the memory size: e.g. on
1329 * incoming migration, or right after reset.
1331 * As memory core doesn't know how is memory accessed, it is up to
1332 * resize callback to update device state and/or add assertions to detect
1333 * misuse, if necessary.
1335 int qemu_ram_resize(ram_addr_t base
, ram_addr_t newsize
, Error
**errp
)
1337 RAMBlock
*block
= find_ram_block(base
);
1341 newsize
= TARGET_PAGE_ALIGN(newsize
);
1343 if (block
->used_length
== newsize
) {
1347 if (!(block
->flags
& RAM_RESIZEABLE
)) {
1348 error_setg_errno(errp
, EINVAL
,
1349 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1350 " in != 0x" RAM_ADDR_FMT
, block
->idstr
,
1351 newsize
, block
->used_length
);
1355 if (block
->max_length
< newsize
) {
1356 error_setg_errno(errp
, EINVAL
,
1357 "Length too large: %s: 0x" RAM_ADDR_FMT
1358 " > 0x" RAM_ADDR_FMT
, block
->idstr
,
1359 newsize
, block
->max_length
);
1363 cpu_physical_memory_clear_dirty_range(block
->offset
, block
->used_length
);
1364 block
->used_length
= newsize
;
1365 cpu_physical_memory_set_dirty_range(block
->offset
, block
->used_length
,
1367 memory_region_set_size(block
->mr
, newsize
);
1368 if (block
->resized
) {
1369 block
->resized(block
->idstr
, newsize
, block
->host
);
1374 static ram_addr_t
ram_block_add(RAMBlock
*new_block
, Error
**errp
)
1377 RAMBlock
*last_block
= NULL
;
1378 ram_addr_t old_ram_size
, new_ram_size
;
1380 old_ram_size
= last_ram_offset() >> TARGET_PAGE_BITS
;
1382 qemu_mutex_lock_ramlist();
1383 new_block
->offset
= find_ram_offset(new_block
->max_length
);
1385 if (!new_block
->host
) {
1386 if (xen_enabled()) {
1387 xen_ram_alloc(new_block
->offset
, new_block
->max_length
,
1390 new_block
->host
= phys_mem_alloc(new_block
->max_length
,
1391 &new_block
->mr
->align
);
1392 if (!new_block
->host
) {
1393 error_setg_errno(errp
, errno
,
1394 "cannot set up guest memory '%s'",
1395 memory_region_name(new_block
->mr
));
1396 qemu_mutex_unlock_ramlist();
1399 memory_try_enable_merging(new_block
->host
, new_block
->max_length
);
1403 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1404 * QLIST (which has an RCU-friendly variant) does not have insertion at
1405 * tail, so save the last element in last_block.
1407 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1409 if (block
->max_length
< new_block
->max_length
) {
1414 QLIST_INSERT_BEFORE_RCU(block
, new_block
, next
);
1415 } else if (last_block
) {
1416 QLIST_INSERT_AFTER_RCU(last_block
, new_block
, next
);
1417 } else { /* list is empty */
1418 QLIST_INSERT_HEAD_RCU(&ram_list
.blocks
, new_block
, next
);
1420 ram_list
.mru_block
= NULL
;
1422 /* Write list before version */
1425 qemu_mutex_unlock_ramlist();
1427 new_ram_size
= last_ram_offset() >> TARGET_PAGE_BITS
;
1429 if (new_ram_size
> old_ram_size
) {
1432 /* ram_list.dirty_memory[] is protected by the iothread lock. */
1433 for (i
= 0; i
< DIRTY_MEMORY_NUM
; i
++) {
1434 ram_list
.dirty_memory
[i
] =
1435 bitmap_zero_extend(ram_list
.dirty_memory
[i
],
1436 old_ram_size
, new_ram_size
);
1439 cpu_physical_memory_set_dirty_range(new_block
->offset
,
1440 new_block
->used_length
,
1443 if (new_block
->host
) {
1444 qemu_ram_setup_dump(new_block
->host
, new_block
->max_length
);
1445 qemu_madvise(new_block
->host
, new_block
->max_length
, QEMU_MADV_HUGEPAGE
);
1446 qemu_madvise(new_block
->host
, new_block
->max_length
, QEMU_MADV_DONTFORK
);
1447 if (kvm_enabled()) {
1448 kvm_setup_guest_memory(new_block
->host
, new_block
->max_length
);
1452 return new_block
->offset
;
1456 ram_addr_t
qemu_ram_alloc_from_file(ram_addr_t size
, MemoryRegion
*mr
,
1457 bool share
, const char *mem_path
,
1460 RAMBlock
*new_block
;
1462 Error
*local_err
= NULL
;
1464 if (xen_enabled()) {
1465 error_setg(errp
, "-mem-path not supported with Xen");
1469 if (phys_mem_alloc
!= qemu_anon_ram_alloc
) {
1471 * file_ram_alloc() needs to allocate just like
1472 * phys_mem_alloc, but we haven't bothered to provide
1476 "-mem-path not supported with this accelerator");
1480 size
= TARGET_PAGE_ALIGN(size
);
1481 new_block
= g_malloc0(sizeof(*new_block
));
1483 new_block
->used_length
= size
;
1484 new_block
->max_length
= size
;
1485 new_block
->flags
= share
? RAM_SHARED
: 0;
1486 new_block
->host
= file_ram_alloc(new_block
, size
,
1488 if (!new_block
->host
) {
1493 addr
= ram_block_add(new_block
, &local_err
);
1496 error_propagate(errp
, local_err
);
1504 ram_addr_t
qemu_ram_alloc_internal(ram_addr_t size
, ram_addr_t max_size
,
1505 void (*resized
)(const char*,
1508 void *host
, bool resizeable
,
1509 MemoryRegion
*mr
, Error
**errp
)
1511 RAMBlock
*new_block
;
1513 Error
*local_err
= NULL
;
1515 size
= TARGET_PAGE_ALIGN(size
);
1516 max_size
= TARGET_PAGE_ALIGN(max_size
);
1517 new_block
= g_malloc0(sizeof(*new_block
));
1519 new_block
->resized
= resized
;
1520 new_block
->used_length
= size
;
1521 new_block
->max_length
= max_size
;
1522 assert(max_size
>= size
);
1524 new_block
->host
= host
;
1526 new_block
->flags
|= RAM_PREALLOC
;
1529 new_block
->flags
|= RAM_RESIZEABLE
;
1531 addr
= ram_block_add(new_block
, &local_err
);
1534 error_propagate(errp
, local_err
);
1540 ram_addr_t
qemu_ram_alloc_from_ptr(ram_addr_t size
, void *host
,
1541 MemoryRegion
*mr
, Error
**errp
)
1543 return qemu_ram_alloc_internal(size
, size
, NULL
, host
, false, mr
, errp
);
1546 ram_addr_t
qemu_ram_alloc(ram_addr_t size
, MemoryRegion
*mr
, Error
**errp
)
1548 return qemu_ram_alloc_internal(size
, size
, NULL
, NULL
, false, mr
, errp
);
1551 ram_addr_t
qemu_ram_alloc_resizeable(ram_addr_t size
, ram_addr_t maxsz
,
1552 void (*resized
)(const char*,
1555 MemoryRegion
*mr
, Error
**errp
)
1557 return qemu_ram_alloc_internal(size
, maxsz
, resized
, NULL
, true, mr
, errp
);
1560 void qemu_ram_free_from_ptr(ram_addr_t addr
)
1564 qemu_mutex_lock_ramlist();
1565 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1566 if (addr
== block
->offset
) {
1567 QLIST_REMOVE_RCU(block
, next
);
1568 ram_list
.mru_block
= NULL
;
1569 /* Write list before version */
1572 g_free_rcu(block
, rcu
);
1576 qemu_mutex_unlock_ramlist();
1579 static void reclaim_ramblock(RAMBlock
*block
)
1581 if (block
->flags
& RAM_PREALLOC
) {
1583 } else if (xen_enabled()) {
1584 xen_invalidate_map_cache_entry(block
->host
);
1586 } else if (block
->fd
>= 0) {
1587 munmap(block
->host
, block
->max_length
);
1591 qemu_anon_ram_free(block
->host
, block
->max_length
);
1596 void qemu_ram_free(ram_addr_t addr
)
1600 qemu_mutex_lock_ramlist();
1601 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1602 if (addr
== block
->offset
) {
1603 QLIST_REMOVE_RCU(block
, next
);
1604 ram_list
.mru_block
= NULL
;
1605 /* Write list before version */
1608 call_rcu(block
, reclaim_ramblock
, rcu
);
1612 qemu_mutex_unlock_ramlist();
1616 void qemu_ram_remap(ram_addr_t addr
, ram_addr_t length
)
1623 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1624 offset
= addr
- block
->offset
;
1625 if (offset
< block
->max_length
) {
1626 vaddr
= ramblock_ptr(block
, offset
);
1627 if (block
->flags
& RAM_PREALLOC
) {
1629 } else if (xen_enabled()) {
1633 if (block
->fd
>= 0) {
1634 flags
|= (block
->flags
& RAM_SHARED
?
1635 MAP_SHARED
: MAP_PRIVATE
);
1636 area
= mmap(vaddr
, length
, PROT_READ
| PROT_WRITE
,
1637 flags
, block
->fd
, offset
);
1640 * Remap needs to match alloc. Accelerators that
1641 * set phys_mem_alloc never remap. If they did,
1642 * we'd need a remap hook here.
1644 assert(phys_mem_alloc
== qemu_anon_ram_alloc
);
1646 flags
|= MAP_PRIVATE
| MAP_ANONYMOUS
;
1647 area
= mmap(vaddr
, length
, PROT_READ
| PROT_WRITE
,
1650 if (area
!= vaddr
) {
1651 fprintf(stderr
, "Could not remap addr: "
1652 RAM_ADDR_FMT
"@" RAM_ADDR_FMT
"\n",
1656 memory_try_enable_merging(vaddr
, length
);
1657 qemu_ram_setup_dump(vaddr
, length
);
1662 #endif /* !_WIN32 */
1664 int qemu_get_ram_fd(ram_addr_t addr
)
1670 block
= qemu_get_ram_block(addr
);
1676 void *qemu_get_ram_block_host_ptr(ram_addr_t addr
)
1682 block
= qemu_get_ram_block(addr
);
1683 ptr
= ramblock_ptr(block
, 0);
1688 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1689 * This should not be used for general purpose DMA. Use address_space_map
1690 * or address_space_rw instead. For local memory (e.g. video ram) that the
1691 * device owns, use memory_region_get_ram_ptr.
1693 * By the time this function returns, the returned pointer is not protected
1694 * by RCU anymore. If the caller is not within an RCU critical section and
1695 * does not hold the iothread lock, it must have other means of protecting the
1696 * pointer, such as a reference to the region that includes the incoming
1699 void *qemu_get_ram_ptr(ram_addr_t addr
)
1705 block
= qemu_get_ram_block(addr
);
1707 if (xen_enabled() && block
->host
== NULL
) {
1708 /* We need to check if the requested address is in the RAM
1709 * because we don't want to map the entire memory in QEMU.
1710 * In that case just map until the end of the page.
1712 if (block
->offset
== 0) {
1713 ptr
= xen_map_cache(addr
, 0, 0);
1717 block
->host
= xen_map_cache(block
->offset
, block
->max_length
, 1);
1719 ptr
= ramblock_ptr(block
, addr
- block
->offset
);
1726 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1727 * but takes a size argument.
1729 * By the time this function returns, the returned pointer is not protected
1730 * by RCU anymore. If the caller is not within an RCU critical section and
1731 * does not hold the iothread lock, it must have other means of protecting the
1732 * pointer, such as a reference to the region that includes the incoming
1735 static void *qemu_ram_ptr_length(ram_addr_t addr
, hwaddr
*size
)
1741 if (xen_enabled()) {
1742 return xen_map_cache(addr
, *size
, 1);
1746 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1747 if (addr
- block
->offset
< block
->max_length
) {
1748 if (addr
- block
->offset
+ *size
> block
->max_length
)
1749 *size
= block
->max_length
- addr
+ block
->offset
;
1750 ptr
= ramblock_ptr(block
, addr
- block
->offset
);
1756 fprintf(stderr
, "Bad ram offset %" PRIx64
"\n", (uint64_t)addr
);
1761 /* Some of the softmmu routines need to translate from a host pointer
1762 * (typically a TLB entry) back to a ram offset.
1764 * By the time this function returns, the returned pointer is not protected
1765 * by RCU anymore. If the caller is not within an RCU critical section and
1766 * does not hold the iothread lock, it must have other means of protecting the
1767 * pointer, such as a reference to the region that includes the incoming
1770 MemoryRegion
*qemu_ram_addr_from_host(void *ptr
, ram_addr_t
*ram_addr
)
1773 uint8_t *host
= ptr
;
1776 if (xen_enabled()) {
1778 *ram_addr
= xen_ram_addr_from_mapcache(ptr
);
1779 mr
= qemu_get_ram_block(*ram_addr
)->mr
;
1785 block
= atomic_rcu_read(&ram_list
.mru_block
);
1786 if (block
&& block
->host
&& host
- block
->host
< block
->max_length
) {
1790 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1791 /* This case append when the block is not mapped. */
1792 if (block
->host
== NULL
) {
1795 if (host
- block
->host
< block
->max_length
) {
1804 *ram_addr
= block
->offset
+ (host
- block
->host
);
1810 static void notdirty_mem_write(void *opaque
, hwaddr ram_addr
,
1811 uint64_t val
, unsigned size
)
1813 if (!cpu_physical_memory_get_dirty_flag(ram_addr
, DIRTY_MEMORY_CODE
)) {
1814 tb_invalidate_phys_page_fast(ram_addr
, size
);
1818 stb_p(qemu_get_ram_ptr(ram_addr
), val
);
1821 stw_p(qemu_get_ram_ptr(ram_addr
), val
);
1824 stl_p(qemu_get_ram_ptr(ram_addr
), val
);
1829 /* Set both VGA and migration bits for simplicity and to remove
1830 * the notdirty callback faster.
1832 cpu_physical_memory_set_dirty_range(ram_addr
, size
,
1833 DIRTY_CLIENTS_NOCODE
);
1834 /* we remove the notdirty callback only if the code has been
1836 if (!cpu_physical_memory_is_clean(ram_addr
)) {
1837 CPUArchState
*env
= current_cpu
->env_ptr
;
1838 tlb_set_dirty(env
, current_cpu
->mem_io_vaddr
);
1842 static bool notdirty_mem_accepts(void *opaque
, hwaddr addr
,
1843 unsigned size
, bool is_write
)
1848 static const MemoryRegionOps notdirty_mem_ops
= {
1849 .write
= notdirty_mem_write
,
1850 .valid
.accepts
= notdirty_mem_accepts
,
1851 .endianness
= DEVICE_NATIVE_ENDIAN
,
1854 /* Generate a debug exception if a watchpoint has been hit. */
1855 static void check_watchpoint(int offset
, int len
, MemTxAttrs attrs
, int flags
)
1857 CPUState
*cpu
= current_cpu
;
1858 CPUArchState
*env
= cpu
->env_ptr
;
1859 target_ulong pc
, cs_base
;
1864 if (cpu
->watchpoint_hit
) {
1865 /* We re-entered the check after replacing the TB. Now raise
1866 * the debug interrupt so that is will trigger after the
1867 * current instruction. */
1868 cpu_interrupt(cpu
, CPU_INTERRUPT_DEBUG
);
1871 vaddr
= (cpu
->mem_io_vaddr
& TARGET_PAGE_MASK
) + offset
;
1872 QTAILQ_FOREACH(wp
, &cpu
->watchpoints
, entry
) {
1873 if (cpu_watchpoint_address_matches(wp
, vaddr
, len
)
1874 && (wp
->flags
& flags
)) {
1875 if (flags
== BP_MEM_READ
) {
1876 wp
->flags
|= BP_WATCHPOINT_HIT_READ
;
1878 wp
->flags
|= BP_WATCHPOINT_HIT_WRITE
;
1880 wp
->hitaddr
= vaddr
;
1881 wp
->hitattrs
= attrs
;
1882 if (!cpu
->watchpoint_hit
) {
1883 cpu
->watchpoint_hit
= wp
;
1884 tb_check_watchpoint(cpu
);
1885 if (wp
->flags
& BP_STOP_BEFORE_ACCESS
) {
1886 cpu
->exception_index
= EXCP_DEBUG
;
1889 cpu_get_tb_cpu_state(env
, &pc
, &cs_base
, &cpu_flags
);
1890 tb_gen_code(cpu
, pc
, cs_base
, cpu_flags
, 1);
1891 cpu_resume_from_signal(cpu
, NULL
);
1895 wp
->flags
&= ~BP_WATCHPOINT_HIT
;
1900 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1901 so these check for a hit then pass through to the normal out-of-line
1903 static MemTxResult
watch_mem_read(void *opaque
, hwaddr addr
, uint64_t *pdata
,
1904 unsigned size
, MemTxAttrs attrs
)
1909 check_watchpoint(addr
& ~TARGET_PAGE_MASK
, size
, attrs
, BP_MEM_READ
);
1912 data
= address_space_ldub(&address_space_memory
, addr
, attrs
, &res
);
1915 data
= address_space_lduw(&address_space_memory
, addr
, attrs
, &res
);
1918 data
= address_space_ldl(&address_space_memory
, addr
, attrs
, &res
);
1926 static MemTxResult
watch_mem_write(void *opaque
, hwaddr addr
,
1927 uint64_t val
, unsigned size
,
1932 check_watchpoint(addr
& ~TARGET_PAGE_MASK
, size
, attrs
, BP_MEM_WRITE
);
1935 address_space_stb(&address_space_memory
, addr
, val
, attrs
, &res
);
1938 address_space_stw(&address_space_memory
, addr
, val
, attrs
, &res
);
1941 address_space_stl(&address_space_memory
, addr
, val
, attrs
, &res
);
1948 static const MemoryRegionOps watch_mem_ops
= {
1949 .read_with_attrs
= watch_mem_read
,
1950 .write_with_attrs
= watch_mem_write
,
1951 .endianness
= DEVICE_NATIVE_ENDIAN
,
1954 static MemTxResult
subpage_read(void *opaque
, hwaddr addr
, uint64_t *data
,
1955 unsigned len
, MemTxAttrs attrs
)
1957 subpage_t
*subpage
= opaque
;
1961 #if defined(DEBUG_SUBPAGE)
1962 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
"\n", __func__
,
1963 subpage
, len
, addr
);
1965 res
= address_space_read(subpage
->as
, addr
+ subpage
->base
,
1972 *data
= ldub_p(buf
);
1975 *data
= lduw_p(buf
);
1988 static MemTxResult
subpage_write(void *opaque
, hwaddr addr
,
1989 uint64_t value
, unsigned len
, MemTxAttrs attrs
)
1991 subpage_t
*subpage
= opaque
;
1994 #if defined(DEBUG_SUBPAGE)
1995 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1996 " value %"PRIx64
"\n",
1997 __func__
, subpage
, len
, addr
, value
);
2015 return address_space_write(subpage
->as
, addr
+ subpage
->base
,
2019 static bool subpage_accepts(void *opaque
, hwaddr addr
,
2020 unsigned len
, bool is_write
)
2022 subpage_t
*subpage
= opaque
;
2023 #if defined(DEBUG_SUBPAGE)
2024 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx
"\n",
2025 __func__
, subpage
, is_write
? 'w' : 'r', len
, addr
);
2028 return address_space_access_valid(subpage
->as
, addr
+ subpage
->base
,
2032 static const MemoryRegionOps subpage_ops
= {
2033 .read_with_attrs
= subpage_read
,
2034 .write_with_attrs
= subpage_write
,
2035 .impl
.min_access_size
= 1,
2036 .impl
.max_access_size
= 8,
2037 .valid
.min_access_size
= 1,
2038 .valid
.max_access_size
= 8,
2039 .valid
.accepts
= subpage_accepts
,
2040 .endianness
= DEVICE_NATIVE_ENDIAN
,
2043 static int subpage_register (subpage_t
*mmio
, uint32_t start
, uint32_t end
,
2048 if (start
>= TARGET_PAGE_SIZE
|| end
>= TARGET_PAGE_SIZE
)
2050 idx
= SUBPAGE_IDX(start
);
2051 eidx
= SUBPAGE_IDX(end
);
2052 #if defined(DEBUG_SUBPAGE)
2053 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2054 __func__
, mmio
, start
, end
, idx
, eidx
, section
);
2056 for (; idx
<= eidx
; idx
++) {
2057 mmio
->sub_section
[idx
] = section
;
2063 static subpage_t
*subpage_init(AddressSpace
*as
, hwaddr base
)
2067 mmio
= g_malloc0(sizeof(subpage_t
));
2071 memory_region_init_io(&mmio
->iomem
, NULL
, &subpage_ops
, mmio
,
2072 NULL
, TARGET_PAGE_SIZE
);
2073 mmio
->iomem
.subpage
= true;
2074 #if defined(DEBUG_SUBPAGE)
2075 printf("%s: %p base " TARGET_FMT_plx
" len %08x\n", __func__
,
2076 mmio
, base
, TARGET_PAGE_SIZE
);
2078 subpage_register(mmio
, 0, TARGET_PAGE_SIZE
-1, PHYS_SECTION_UNASSIGNED
);
2083 static uint16_t dummy_section(PhysPageMap
*map
, AddressSpace
*as
,
2087 MemoryRegionSection section
= {
2088 .address_space
= as
,
2090 .offset_within_address_space
= 0,
2091 .offset_within_region
= 0,
2092 .size
= int128_2_64(),
2095 return phys_section_add(map
, §ion
);
2098 MemoryRegion
*iotlb_to_region(CPUState
*cpu
, hwaddr index
)
2100 AddressSpaceDispatch
*d
= atomic_rcu_read(&cpu
->memory_dispatch
);
2101 MemoryRegionSection
*sections
= d
->map
.sections
;
2103 return sections
[index
& ~TARGET_PAGE_MASK
].mr
;
2106 static void io_mem_init(void)
2108 memory_region_init_io(&io_mem_rom
, NULL
, &unassigned_mem_ops
, NULL
, NULL
, UINT64_MAX
);
2109 memory_region_init_io(&io_mem_unassigned
, NULL
, &unassigned_mem_ops
, NULL
,
2111 memory_region_init_io(&io_mem_notdirty
, NULL
, ¬dirty_mem_ops
, NULL
,
2113 memory_region_init_io(&io_mem_watch
, NULL
, &watch_mem_ops
, NULL
,
2117 static void mem_begin(MemoryListener
*listener
)
2119 AddressSpace
*as
= container_of(listener
, AddressSpace
, dispatch_listener
);
2120 AddressSpaceDispatch
*d
= g_new0(AddressSpaceDispatch
, 1);
2123 n
= dummy_section(&d
->map
, as
, &io_mem_unassigned
);
2124 assert(n
== PHYS_SECTION_UNASSIGNED
);
2125 n
= dummy_section(&d
->map
, as
, &io_mem_notdirty
);
2126 assert(n
== PHYS_SECTION_NOTDIRTY
);
2127 n
= dummy_section(&d
->map
, as
, &io_mem_rom
);
2128 assert(n
== PHYS_SECTION_ROM
);
2129 n
= dummy_section(&d
->map
, as
, &io_mem_watch
);
2130 assert(n
== PHYS_SECTION_WATCH
);
2132 d
->phys_map
= (PhysPageEntry
) { .ptr
= PHYS_MAP_NODE_NIL
, .skip
= 1 };
2134 as
->next_dispatch
= d
;
2137 static void address_space_dispatch_free(AddressSpaceDispatch
*d
)
2139 phys_sections_free(&d
->map
);
2143 static void mem_commit(MemoryListener
*listener
)
2145 AddressSpace
*as
= container_of(listener
, AddressSpace
, dispatch_listener
);
2146 AddressSpaceDispatch
*cur
= as
->dispatch
;
2147 AddressSpaceDispatch
*next
= as
->next_dispatch
;
2149 phys_page_compact_all(next
, next
->map
.nodes_nb
);
2151 atomic_rcu_set(&as
->dispatch
, next
);
2153 call_rcu(cur
, address_space_dispatch_free
, rcu
);
2157 static void tcg_commit(MemoryListener
*listener
)
2161 /* since each CPU stores ram addresses in its TLB cache, we must
2162 reset the modified entries */
2165 /* FIXME: Disentangle the cpu.h circular files deps so we can
2166 directly get the right CPU from listener. */
2167 if (cpu
->tcg_as_listener
!= listener
) {
2170 cpu_reload_memory_map(cpu
);
2174 void address_space_init_dispatch(AddressSpace
*as
)
2176 as
->dispatch
= NULL
;
2177 as
->dispatch_listener
= (MemoryListener
) {
2179 .commit
= mem_commit
,
2180 .region_add
= mem_add
,
2181 .region_nop
= mem_add
,
2184 memory_listener_register(&as
->dispatch_listener
, as
);
2187 void address_space_unregister(AddressSpace
*as
)
2189 memory_listener_unregister(&as
->dispatch_listener
);
2192 void address_space_destroy_dispatch(AddressSpace
*as
)
2194 AddressSpaceDispatch
*d
= as
->dispatch
;
2196 atomic_rcu_set(&as
->dispatch
, NULL
);
2198 call_rcu(d
, address_space_dispatch_free
, rcu
);
2202 static void memory_map_init(void)
2204 system_memory
= g_malloc(sizeof(*system_memory
));
2206 memory_region_init(system_memory
, NULL
, "system", UINT64_MAX
);
2207 address_space_init(&address_space_memory
, system_memory
, "memory");
2209 system_io
= g_malloc(sizeof(*system_io
));
2210 memory_region_init_io(system_io
, NULL
, &unassigned_io_ops
, NULL
, "io",
2212 address_space_init(&address_space_io
, system_io
, "I/O");
2215 MemoryRegion
*get_system_memory(void)
2217 return system_memory
;
2220 MemoryRegion
*get_system_io(void)
2225 #endif /* !defined(CONFIG_USER_ONLY) */
2227 /* physical memory access (slow version, mainly for debug) */
2228 #if defined(CONFIG_USER_ONLY)
2229 int cpu_memory_rw_debug(CPUState
*cpu
, target_ulong addr
,
2230 uint8_t *buf
, int len
, int is_write
)
2237 page
= addr
& TARGET_PAGE_MASK
;
2238 l
= (page
+ TARGET_PAGE_SIZE
) - addr
;
2241 flags
= page_get_flags(page
);
2242 if (!(flags
& PAGE_VALID
))
2245 if (!(flags
& PAGE_WRITE
))
2247 /* XXX: this code should not depend on lock_user */
2248 if (!(p
= lock_user(VERIFY_WRITE
, addr
, l
, 0)))
2251 unlock_user(p
, addr
, l
);
2253 if (!(flags
& PAGE_READ
))
2255 /* XXX: this code should not depend on lock_user */
2256 if (!(p
= lock_user(VERIFY_READ
, addr
, l
, 1)))
2259 unlock_user(p
, addr
, 0);
2270 static void invalidate_and_set_dirty(MemoryRegion
*mr
, hwaddr addr
,
2273 uint8_t dirty_log_mask
= memory_region_get_dirty_log_mask(mr
);
2274 /* No early return if dirty_log_mask is or becomes 0, because
2275 * cpu_physical_memory_set_dirty_range will still call
2276 * xen_modified_memory.
2278 if (dirty_log_mask
) {
2280 cpu_physical_memory_range_includes_clean(addr
, length
, dirty_log_mask
);
2282 if (dirty_log_mask
& (1 << DIRTY_MEMORY_CODE
)) {
2283 tb_invalidate_phys_range(addr
, addr
+ length
);
2284 dirty_log_mask
&= ~(1 << DIRTY_MEMORY_CODE
);
2286 cpu_physical_memory_set_dirty_range(addr
, length
, dirty_log_mask
);
2289 static int memory_access_size(MemoryRegion
*mr
, unsigned l
, hwaddr addr
)
2291 unsigned access_size_max
= mr
->ops
->valid
.max_access_size
;
2293 /* Regions are assumed to support 1-4 byte accesses unless
2294 otherwise specified. */
2295 if (access_size_max
== 0) {
2296 access_size_max
= 4;
2299 /* Bound the maximum access by the alignment of the address. */
2300 if (!mr
->ops
->impl
.unaligned
) {
2301 unsigned align_size_max
= addr
& -addr
;
2302 if (align_size_max
!= 0 && align_size_max
< access_size_max
) {
2303 access_size_max
= align_size_max
;
2307 /* Don't attempt accesses larger than the maximum. */
2308 if (l
> access_size_max
) {
2309 l
= access_size_max
;
2312 l
= 1 << (qemu_fls(l
) - 1);
2318 MemTxResult
address_space_rw(AddressSpace
*as
, hwaddr addr
, MemTxAttrs attrs
,
2319 uint8_t *buf
, int len
, bool is_write
)
2326 MemTxResult result
= MEMTX_OK
;
2331 mr
= address_space_translate(as
, addr
, &addr1
, &l
, is_write
);
2334 if (!memory_access_is_direct(mr
, is_write
)) {
2335 l
= memory_access_size(mr
, l
, addr1
);
2336 /* XXX: could force current_cpu to NULL to avoid
2340 /* 64 bit write access */
2342 result
|= memory_region_dispatch_write(mr
, addr1
, val
, 8,
2346 /* 32 bit write access */
2348 result
|= memory_region_dispatch_write(mr
, addr1
, val
, 4,
2352 /* 16 bit write access */
2354 result
|= memory_region_dispatch_write(mr
, addr1
, val
, 2,
2358 /* 8 bit write access */
2360 result
|= memory_region_dispatch_write(mr
, addr1
, val
, 1,
2367 addr1
+= memory_region_get_ram_addr(mr
);
2369 ptr
= qemu_get_ram_ptr(addr1
);
2370 memcpy(ptr
, buf
, l
);
2371 invalidate_and_set_dirty(mr
, addr1
, l
);
2374 if (!memory_access_is_direct(mr
, is_write
)) {
2376 l
= memory_access_size(mr
, l
, addr1
);
2379 /* 64 bit read access */
2380 result
|= memory_region_dispatch_read(mr
, addr1
, &val
, 8,
2385 /* 32 bit read access */
2386 result
|= memory_region_dispatch_read(mr
, addr1
, &val
, 4,
2391 /* 16 bit read access */
2392 result
|= memory_region_dispatch_read(mr
, addr1
, &val
, 2,
2397 /* 8 bit read access */
2398 result
|= memory_region_dispatch_read(mr
, addr1
, &val
, 1,
2407 ptr
= qemu_get_ram_ptr(mr
->ram_addr
+ addr1
);
2408 memcpy(buf
, ptr
, l
);
2420 MemTxResult
address_space_write(AddressSpace
*as
, hwaddr addr
, MemTxAttrs attrs
,
2421 const uint8_t *buf
, int len
)
2423 return address_space_rw(as
, addr
, attrs
, (uint8_t *)buf
, len
, true);
2426 MemTxResult
address_space_read(AddressSpace
*as
, hwaddr addr
, MemTxAttrs attrs
,
2427 uint8_t *buf
, int len
)
2429 return address_space_rw(as
, addr
, attrs
, buf
, len
, false);
2433 void cpu_physical_memory_rw(hwaddr addr
, uint8_t *buf
,
2434 int len
, int is_write
)
2436 address_space_rw(&address_space_memory
, addr
, MEMTXATTRS_UNSPECIFIED
,
2437 buf
, len
, is_write
);
2440 enum write_rom_type
{
2445 static inline void cpu_physical_memory_write_rom_internal(AddressSpace
*as
,
2446 hwaddr addr
, const uint8_t *buf
, int len
, enum write_rom_type type
)
2456 mr
= address_space_translate(as
, addr
, &addr1
, &l
, true);
2458 if (!(memory_region_is_ram(mr
) ||
2459 memory_region_is_romd(mr
))) {
2462 addr1
+= memory_region_get_ram_addr(mr
);
2464 ptr
= qemu_get_ram_ptr(addr1
);
2467 memcpy(ptr
, buf
, l
);
2468 invalidate_and_set_dirty(mr
, addr1
, l
);
2471 flush_icache_range((uintptr_t)ptr
, (uintptr_t)ptr
+ l
);
2482 /* used for ROM loading : can write in RAM and ROM */
2483 void cpu_physical_memory_write_rom(AddressSpace
*as
, hwaddr addr
,
2484 const uint8_t *buf
, int len
)
2486 cpu_physical_memory_write_rom_internal(as
, addr
, buf
, len
, WRITE_DATA
);
2489 void cpu_flush_icache_range(hwaddr start
, int len
)
2492 * This function should do the same thing as an icache flush that was
2493 * triggered from within the guest. For TCG we are always cache coherent,
2494 * so there is no need to flush anything. For KVM / Xen we need to flush
2495 * the host's instruction cache at least.
2497 if (tcg_enabled()) {
2501 cpu_physical_memory_write_rom_internal(&address_space_memory
,
2502 start
, NULL
, len
, FLUSH_CACHE
);
2513 static BounceBuffer bounce
;
2515 typedef struct MapClient
{
2517 QLIST_ENTRY(MapClient
) link
;
2520 QemuMutex map_client_list_lock
;
2521 static QLIST_HEAD(map_client_list
, MapClient
) map_client_list
2522 = QLIST_HEAD_INITIALIZER(map_client_list
);
2524 static void cpu_unregister_map_client_do(MapClient
*client
)
2526 QLIST_REMOVE(client
, link
);
2530 static void cpu_notify_map_clients_locked(void)
2534 while (!QLIST_EMPTY(&map_client_list
)) {
2535 client
= QLIST_FIRST(&map_client_list
);
2536 qemu_bh_schedule(client
->bh
);
2537 cpu_unregister_map_client_do(client
);
2541 void cpu_register_map_client(QEMUBH
*bh
)
2543 MapClient
*client
= g_malloc(sizeof(*client
));
2545 qemu_mutex_lock(&map_client_list_lock
);
2547 QLIST_INSERT_HEAD(&map_client_list
, client
, link
);
2548 if (!atomic_read(&bounce
.in_use
)) {
2549 cpu_notify_map_clients_locked();
2551 qemu_mutex_unlock(&map_client_list_lock
);
2554 void cpu_exec_init_all(void)
2556 qemu_mutex_init(&ram_list
.mutex
);
2559 qemu_mutex_init(&map_client_list_lock
);
2562 void cpu_unregister_map_client(QEMUBH
*bh
)
2566 qemu_mutex_lock(&map_client_list_lock
);
2567 QLIST_FOREACH(client
, &map_client_list
, link
) {
2568 if (client
->bh
== bh
) {
2569 cpu_unregister_map_client_do(client
);
2573 qemu_mutex_unlock(&map_client_list_lock
);
2576 static void cpu_notify_map_clients(void)
2578 qemu_mutex_lock(&map_client_list_lock
);
2579 cpu_notify_map_clients_locked();
2580 qemu_mutex_unlock(&map_client_list_lock
);
2583 bool address_space_access_valid(AddressSpace
*as
, hwaddr addr
, int len
, bool is_write
)
2591 mr
= address_space_translate(as
, addr
, &xlat
, &l
, is_write
);
2592 if (!memory_access_is_direct(mr
, is_write
)) {
2593 l
= memory_access_size(mr
, l
, addr
);
2594 if (!memory_region_access_valid(mr
, xlat
, l
, is_write
)) {
2606 /* Map a physical memory region into a host virtual address.
2607 * May map a subset of the requested range, given by and returned in *plen.
2608 * May return NULL if resources needed to perform the mapping are exhausted.
2609 * Use only for reads OR writes - not for read-modify-write operations.
2610 * Use cpu_register_map_client() to know when retrying the map operation is
2611 * likely to succeed.
2613 void *address_space_map(AddressSpace
*as
,
2620 hwaddr l
, xlat
, base
;
2621 MemoryRegion
*mr
, *this_mr
;
2630 mr
= address_space_translate(as
, addr
, &xlat
, &l
, is_write
);
2632 if (!memory_access_is_direct(mr
, is_write
)) {
2633 if (atomic_xchg(&bounce
.in_use
, true)) {
2637 /* Avoid unbounded allocations */
2638 l
= MIN(l
, TARGET_PAGE_SIZE
);
2639 bounce
.buffer
= qemu_memalign(TARGET_PAGE_SIZE
, l
);
2643 memory_region_ref(mr
);
2646 address_space_read(as
, addr
, MEMTXATTRS_UNSPECIFIED
,
2652 return bounce
.buffer
;
2656 raddr
= memory_region_get_ram_addr(mr
);
2667 this_mr
= address_space_translate(as
, addr
, &xlat
, &l
, is_write
);
2668 if (this_mr
!= mr
|| xlat
!= base
+ done
) {
2673 memory_region_ref(mr
);
2676 return qemu_ram_ptr_length(raddr
+ base
, plen
);
2679 /* Unmaps a memory region previously mapped by address_space_map().
2680 * Will also mark the memory as dirty if is_write == 1. access_len gives
2681 * the amount of memory that was actually read or written by the caller.
2683 void address_space_unmap(AddressSpace
*as
, void *buffer
, hwaddr len
,
2684 int is_write
, hwaddr access_len
)
2686 if (buffer
!= bounce
.buffer
) {
2690 mr
= qemu_ram_addr_from_host(buffer
, &addr1
);
2693 invalidate_and_set_dirty(mr
, addr1
, access_len
);
2695 if (xen_enabled()) {
2696 xen_invalidate_map_cache_entry(buffer
);
2698 memory_region_unref(mr
);
2702 address_space_write(as
, bounce
.addr
, MEMTXATTRS_UNSPECIFIED
,
2703 bounce
.buffer
, access_len
);
2705 qemu_vfree(bounce
.buffer
);
2706 bounce
.buffer
= NULL
;
2707 memory_region_unref(bounce
.mr
);
2708 atomic_mb_set(&bounce
.in_use
, false);
2709 cpu_notify_map_clients();
2712 void *cpu_physical_memory_map(hwaddr addr
,
2716 return address_space_map(&address_space_memory
, addr
, plen
, is_write
);
2719 void cpu_physical_memory_unmap(void *buffer
, hwaddr len
,
2720 int is_write
, hwaddr access_len
)
2722 return address_space_unmap(&address_space_memory
, buffer
, len
, is_write
, access_len
);
2725 /* warning: addr must be aligned */
2726 static inline uint32_t address_space_ldl_internal(AddressSpace
*as
, hwaddr addr
,
2728 MemTxResult
*result
,
2729 enum device_endian endian
)
2739 mr
= address_space_translate(as
, addr
, &addr1
, &l
, false);
2740 if (l
< 4 || !memory_access_is_direct(mr
, false)) {
2742 r
= memory_region_dispatch_read(mr
, addr1
, &val
, 4, attrs
);
2743 #if defined(TARGET_WORDS_BIGENDIAN)
2744 if (endian
== DEVICE_LITTLE_ENDIAN
) {
2748 if (endian
== DEVICE_BIG_ENDIAN
) {
2754 ptr
= qemu_get_ram_ptr((memory_region_get_ram_addr(mr
)
2758 case DEVICE_LITTLE_ENDIAN
:
2759 val
= ldl_le_p(ptr
);
2761 case DEVICE_BIG_ENDIAN
:
2762 val
= ldl_be_p(ptr
);
2777 uint32_t address_space_ldl(AddressSpace
*as
, hwaddr addr
,
2778 MemTxAttrs attrs
, MemTxResult
*result
)
2780 return address_space_ldl_internal(as
, addr
, attrs
, result
,
2781 DEVICE_NATIVE_ENDIAN
);
2784 uint32_t address_space_ldl_le(AddressSpace
*as
, hwaddr addr
,
2785 MemTxAttrs attrs
, MemTxResult
*result
)
2787 return address_space_ldl_internal(as
, addr
, attrs
, result
,
2788 DEVICE_LITTLE_ENDIAN
);
2791 uint32_t address_space_ldl_be(AddressSpace
*as
, hwaddr addr
,
2792 MemTxAttrs attrs
, MemTxResult
*result
)
2794 return address_space_ldl_internal(as
, addr
, attrs
, result
,
2798 uint32_t ldl_phys(AddressSpace
*as
, hwaddr addr
)
2800 return address_space_ldl(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
2803 uint32_t ldl_le_phys(AddressSpace
*as
, hwaddr addr
)
2805 return address_space_ldl_le(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
2808 uint32_t ldl_be_phys(AddressSpace
*as
, hwaddr addr
)
2810 return address_space_ldl_be(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
2813 /* warning: addr must be aligned */
2814 static inline uint64_t address_space_ldq_internal(AddressSpace
*as
, hwaddr addr
,
2816 MemTxResult
*result
,
2817 enum device_endian endian
)
2827 mr
= address_space_translate(as
, addr
, &addr1
, &l
,
2829 if (l
< 8 || !memory_access_is_direct(mr
, false)) {
2831 r
= memory_region_dispatch_read(mr
, addr1
, &val
, 8, attrs
);
2832 #if defined(TARGET_WORDS_BIGENDIAN)
2833 if (endian
== DEVICE_LITTLE_ENDIAN
) {
2837 if (endian
== DEVICE_BIG_ENDIAN
) {
2843 ptr
= qemu_get_ram_ptr((memory_region_get_ram_addr(mr
)
2847 case DEVICE_LITTLE_ENDIAN
:
2848 val
= ldq_le_p(ptr
);
2850 case DEVICE_BIG_ENDIAN
:
2851 val
= ldq_be_p(ptr
);
2866 uint64_t address_space_ldq(AddressSpace
*as
, hwaddr addr
,
2867 MemTxAttrs attrs
, MemTxResult
*result
)
2869 return address_space_ldq_internal(as
, addr
, attrs
, result
,
2870 DEVICE_NATIVE_ENDIAN
);
2873 uint64_t address_space_ldq_le(AddressSpace
*as
, hwaddr addr
,
2874 MemTxAttrs attrs
, MemTxResult
*result
)
2876 return address_space_ldq_internal(as
, addr
, attrs
, result
,
2877 DEVICE_LITTLE_ENDIAN
);
2880 uint64_t address_space_ldq_be(AddressSpace
*as
, hwaddr addr
,
2881 MemTxAttrs attrs
, MemTxResult
*result
)
2883 return address_space_ldq_internal(as
, addr
, attrs
, result
,
2887 uint64_t ldq_phys(AddressSpace
*as
, hwaddr addr
)
2889 return address_space_ldq(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
2892 uint64_t ldq_le_phys(AddressSpace
*as
, hwaddr addr
)
2894 return address_space_ldq_le(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
2897 uint64_t ldq_be_phys(AddressSpace
*as
, hwaddr addr
)
2899 return address_space_ldq_be(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
2903 uint32_t address_space_ldub(AddressSpace
*as
, hwaddr addr
,
2904 MemTxAttrs attrs
, MemTxResult
*result
)
2909 r
= address_space_rw(as
, addr
, attrs
, &val
, 1, 0);
2916 uint32_t ldub_phys(AddressSpace
*as
, hwaddr addr
)
2918 return address_space_ldub(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
2921 /* warning: addr must be aligned */
2922 static inline uint32_t address_space_lduw_internal(AddressSpace
*as
,
2925 MemTxResult
*result
,
2926 enum device_endian endian
)
2936 mr
= address_space_translate(as
, addr
, &addr1
, &l
,
2938 if (l
< 2 || !memory_access_is_direct(mr
, false)) {
2940 r
= memory_region_dispatch_read(mr
, addr1
, &val
, 2, attrs
);
2941 #if defined(TARGET_WORDS_BIGENDIAN)
2942 if (endian
== DEVICE_LITTLE_ENDIAN
) {
2946 if (endian
== DEVICE_BIG_ENDIAN
) {
2952 ptr
= qemu_get_ram_ptr((memory_region_get_ram_addr(mr
)
2956 case DEVICE_LITTLE_ENDIAN
:
2957 val
= lduw_le_p(ptr
);
2959 case DEVICE_BIG_ENDIAN
:
2960 val
= lduw_be_p(ptr
);
2975 uint32_t address_space_lduw(AddressSpace
*as
, hwaddr addr
,
2976 MemTxAttrs attrs
, MemTxResult
*result
)
2978 return address_space_lduw_internal(as
, addr
, attrs
, result
,
2979 DEVICE_NATIVE_ENDIAN
);
2982 uint32_t address_space_lduw_le(AddressSpace
*as
, hwaddr addr
,
2983 MemTxAttrs attrs
, MemTxResult
*result
)
2985 return address_space_lduw_internal(as
, addr
, attrs
, result
,
2986 DEVICE_LITTLE_ENDIAN
);
2989 uint32_t address_space_lduw_be(AddressSpace
*as
, hwaddr addr
,
2990 MemTxAttrs attrs
, MemTxResult
*result
)
2992 return address_space_lduw_internal(as
, addr
, attrs
, result
,
2996 uint32_t lduw_phys(AddressSpace
*as
, hwaddr addr
)
2998 return address_space_lduw(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3001 uint32_t lduw_le_phys(AddressSpace
*as
, hwaddr addr
)
3003 return address_space_lduw_le(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3006 uint32_t lduw_be_phys(AddressSpace
*as
, hwaddr addr
)
3008 return address_space_lduw_be(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3011 /* warning: addr must be aligned. The ram page is not masked as dirty
3012 and the code inside is not invalidated. It is useful if the dirty
3013 bits are used to track modified PTEs */
3014 void address_space_stl_notdirty(AddressSpace
*as
, hwaddr addr
, uint32_t val
,
3015 MemTxAttrs attrs
, MemTxResult
*result
)
3022 uint8_t dirty_log_mask
;
3025 mr
= address_space_translate(as
, addr
, &addr1
, &l
,
3027 if (l
< 4 || !memory_access_is_direct(mr
, true)) {
3028 r
= memory_region_dispatch_write(mr
, addr1
, val
, 4, attrs
);
3030 addr1
+= memory_region_get_ram_addr(mr
) & TARGET_PAGE_MASK
;
3031 ptr
= qemu_get_ram_ptr(addr1
);
3034 dirty_log_mask
= memory_region_get_dirty_log_mask(mr
);
3035 dirty_log_mask
&= ~(1 << DIRTY_MEMORY_CODE
);
3036 cpu_physical_memory_set_dirty_range(addr1
, 4, dirty_log_mask
);
3045 void stl_phys_notdirty(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
3047 address_space_stl_notdirty(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3050 /* warning: addr must be aligned */
3051 static inline void address_space_stl_internal(AddressSpace
*as
,
3052 hwaddr addr
, uint32_t val
,
3054 MemTxResult
*result
,
3055 enum device_endian endian
)
3064 mr
= address_space_translate(as
, addr
, &addr1
, &l
,
3066 if (l
< 4 || !memory_access_is_direct(mr
, true)) {
3067 #if defined(TARGET_WORDS_BIGENDIAN)
3068 if (endian
== DEVICE_LITTLE_ENDIAN
) {
3072 if (endian
== DEVICE_BIG_ENDIAN
) {
3076 r
= memory_region_dispatch_write(mr
, addr1
, val
, 4, attrs
);
3079 addr1
+= memory_region_get_ram_addr(mr
) & TARGET_PAGE_MASK
;
3080 ptr
= qemu_get_ram_ptr(addr1
);
3082 case DEVICE_LITTLE_ENDIAN
:
3085 case DEVICE_BIG_ENDIAN
:
3092 invalidate_and_set_dirty(mr
, addr1
, 4);
3101 void address_space_stl(AddressSpace
*as
, hwaddr addr
, uint32_t val
,
3102 MemTxAttrs attrs
, MemTxResult
*result
)
3104 address_space_stl_internal(as
, addr
, val
, attrs
, result
,
3105 DEVICE_NATIVE_ENDIAN
);
3108 void address_space_stl_le(AddressSpace
*as
, hwaddr addr
, uint32_t val
,
3109 MemTxAttrs attrs
, MemTxResult
*result
)
3111 address_space_stl_internal(as
, addr
, val
, attrs
, result
,
3112 DEVICE_LITTLE_ENDIAN
);
3115 void address_space_stl_be(AddressSpace
*as
, hwaddr addr
, uint32_t val
,
3116 MemTxAttrs attrs
, MemTxResult
*result
)
3118 address_space_stl_internal(as
, addr
, val
, attrs
, result
,
3122 void stl_phys(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
3124 address_space_stl(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3127 void stl_le_phys(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
3129 address_space_stl_le(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3132 void stl_be_phys(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
3134 address_space_stl_be(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3138 void address_space_stb(AddressSpace
*as
, hwaddr addr
, uint32_t val
,
3139 MemTxAttrs attrs
, MemTxResult
*result
)
3144 r
= address_space_rw(as
, addr
, attrs
, &v
, 1, 1);
3150 void stb_phys(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
3152 address_space_stb(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3155 /* warning: addr must be aligned */
3156 static inline void address_space_stw_internal(AddressSpace
*as
,
3157 hwaddr addr
, uint32_t val
,
3159 MemTxResult
*result
,
3160 enum device_endian endian
)
3169 mr
= address_space_translate(as
, addr
, &addr1
, &l
, true);
3170 if (l
< 2 || !memory_access_is_direct(mr
, true)) {
3171 #if defined(TARGET_WORDS_BIGENDIAN)
3172 if (endian
== DEVICE_LITTLE_ENDIAN
) {
3176 if (endian
== DEVICE_BIG_ENDIAN
) {
3180 r
= memory_region_dispatch_write(mr
, addr1
, val
, 2, attrs
);
3183 addr1
+= memory_region_get_ram_addr(mr
) & TARGET_PAGE_MASK
;
3184 ptr
= qemu_get_ram_ptr(addr1
);
3186 case DEVICE_LITTLE_ENDIAN
:
3189 case DEVICE_BIG_ENDIAN
:
3196 invalidate_and_set_dirty(mr
, addr1
, 2);
3205 void address_space_stw(AddressSpace
*as
, hwaddr addr
, uint32_t val
,
3206 MemTxAttrs attrs
, MemTxResult
*result
)
3208 address_space_stw_internal(as
, addr
, val
, attrs
, result
,
3209 DEVICE_NATIVE_ENDIAN
);
3212 void address_space_stw_le(AddressSpace
*as
, hwaddr addr
, uint32_t val
,
3213 MemTxAttrs attrs
, MemTxResult
*result
)
3215 address_space_stw_internal(as
, addr
, val
, attrs
, result
,
3216 DEVICE_LITTLE_ENDIAN
);
3219 void address_space_stw_be(AddressSpace
*as
, hwaddr addr
, uint32_t val
,
3220 MemTxAttrs attrs
, MemTxResult
*result
)
3222 address_space_stw_internal(as
, addr
, val
, attrs
, result
,
3226 void stw_phys(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
3228 address_space_stw(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3231 void stw_le_phys(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
3233 address_space_stw_le(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3236 void stw_be_phys(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
3238 address_space_stw_be(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3242 void address_space_stq(AddressSpace
*as
, hwaddr addr
, uint64_t val
,
3243 MemTxAttrs attrs
, MemTxResult
*result
)
3247 r
= address_space_rw(as
, addr
, attrs
, (void *) &val
, 8, 1);
3253 void address_space_stq_le(AddressSpace
*as
, hwaddr addr
, uint64_t val
,
3254 MemTxAttrs attrs
, MemTxResult
*result
)
3257 val
= cpu_to_le64(val
);
3258 r
= address_space_rw(as
, addr
, attrs
, (void *) &val
, 8, 1);
3263 void address_space_stq_be(AddressSpace
*as
, hwaddr addr
, uint64_t val
,
3264 MemTxAttrs attrs
, MemTxResult
*result
)
3267 val
= cpu_to_be64(val
);
3268 r
= address_space_rw(as
, addr
, attrs
, (void *) &val
, 8, 1);
3274 void stq_phys(AddressSpace
*as
, hwaddr addr
, uint64_t val
)
3276 address_space_stq(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3279 void stq_le_phys(AddressSpace
*as
, hwaddr addr
, uint64_t val
)
3281 address_space_stq_le(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3284 void stq_be_phys(AddressSpace
*as
, hwaddr addr
, uint64_t val
)
3286 address_space_stq_be(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3289 /* virtual memory access for debug (includes writing to ROM) */
3290 int cpu_memory_rw_debug(CPUState
*cpu
, target_ulong addr
,
3291 uint8_t *buf
, int len
, int is_write
)
3298 page
= addr
& TARGET_PAGE_MASK
;
3299 phys_addr
= cpu_get_phys_page_debug(cpu
, page
);
3300 /* if no physical page mapped, return an error */
3301 if (phys_addr
== -1)
3303 l
= (page
+ TARGET_PAGE_SIZE
) - addr
;
3306 phys_addr
+= (addr
& ~TARGET_PAGE_MASK
);
3308 cpu_physical_memory_write_rom(cpu
->as
, phys_addr
, buf
, l
);
3310 address_space_rw(cpu
->as
, phys_addr
, MEMTXATTRS_UNSPECIFIED
,
3322 * A helper function for the _utterly broken_ virtio device model to find out if
3323 * it's running on a big endian machine. Don't do this at home kids!
3325 bool target_words_bigendian(void);
3326 bool target_words_bigendian(void)
3328 #if defined(TARGET_WORDS_BIGENDIAN)
3335 #ifndef CONFIG_USER_ONLY
3336 bool cpu_physical_memory_is_io(hwaddr phys_addr
)
3343 mr
= address_space_translate(&address_space_memory
,
3344 phys_addr
, &phys_addr
, &l
, false);
3346 res
= !(memory_region_is_ram(mr
) || memory_region_is_romd(mr
));
3351 void qemu_ram_foreach_block(RAMBlockIterFunc func
, void *opaque
)
3356 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
3357 func(block
->host
, block
->offset
, block
->used_length
, opaque
);